From fae1f0fb2d6b0b3944b2d7048221272388474136 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 9 Feb 2023 19:34:29 -0700 Subject: [PATCH 01/63] containers: add syncmap --- util/containers/syncmap.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 util/containers/syncmap.go diff --git a/util/containers/syncmap.go b/util/containers/syncmap.go new file mode 100644 index 0000000000..7952a32252 --- /dev/null +++ b/util/containers/syncmap.go @@ -0,0 +1,24 @@ +package containers + +import "sync" + +type SyncMap[K any, V any] struct { + internal sync.Map +} + +func (m *SyncMap[K, V]) Load(key K) (V, bool) { + val, found := m.internal.Load(key) + if !found { + var empty V + return empty, false + } + return val.(V), true +} + +func (m *SyncMap[K, V]) Store(key K, val V) { + m.internal.Store(key, val) +} + +func (m *SyncMap[K, V]) Delete(key K) { + m.internal.Delete(key) +} From 86a734e9a65e4b3d08619540322db8f6dcee5b15 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 9 Feb 2023 19:35:00 -0700 Subject: [PATCH 02/63] stopwaiter: optimize iterative 0-time call --- util/stopwaiter/stopwaiter.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/util/stopwaiter/stopwaiter.go b/util/stopwaiter/stopwaiter.go index 325a843559..0c0b9f44a1 100644 --- a/util/stopwaiter/stopwaiter.go +++ b/util/stopwaiter/stopwaiter.go @@ -196,6 +196,9 @@ func (s *StopWaiterSafe) CallIteratively(foo func(context.Context) time.Duration if ctx.Err() != nil { return } + if interval == time.Duration(0) { + continue + } timer := time.NewTimer(interval) select { case <-ctx.Done(): From 3438753217ff9e748945518bee86def8fc0bc96b Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 9 Feb 2023 19:39:51 -0700 Subject: [PATCH 03/63] initial separation of execution from validation --- arbnode/api.go | 29 +- arbnode/execution/block_recorder.go | 357 +++++++ arbnode/execution/executionengine.go | 103 +- arbnode/execution/node.go | 3 + arbnode/inbox_test.go | 13 +- arbnode/inbox_tracker.go | 12 - arbnode/node.go | 10 +- arbnode/sync_monitor.go | 8 +- arbnode/transaction_streamer.go | 40 +- staker/block_challenge_backend.go | 61 +- staker/block_validator.go | 1142 ++++++++++------------ staker/block_validator_schema.go | 25 +- staker/challenge_manager.go | 46 +- staker/l1_validator.go | 362 +++---- staker/staker.go | 4 +- staker/stateless_block_validator.go | 405 +++----- system_tests/block_validator_test.go | 7 +- system_tests/full_challenge_impl_test.go | 8 +- system_tests/seqinbox_test.go | 8 +- system_tests/staker_test.go | 6 +- system_tests/twonodeslong_test.go | 3 +- 21 files changed, 1285 insertions(+), 1367 deletions(-) create mode 100644 arbnode/execution/block_recorder.go diff --git a/arbnode/api.go b/arbnode/api.go index 8d8516dda0..27ddc45f20 100644 --- a/arbnode/api.go +++ b/arbnode/api.go @@ -8,8 +8,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/rpc" + "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/staker" "github.com/pkg/errors" ) @@ -18,14 +17,8 @@ type BlockValidatorAPI struct { val *staker.BlockValidator } -func (a *BlockValidatorAPI) LatestValidatedBlock(ctx context.Context) (hexutil.Uint64, error) { - block := a.val.LastBlockValidated() - return hexutil.Uint64(block), nil -} - -func (a *BlockValidatorAPI) LatestValidatedBlockHash(ctx context.Context) (common.Hash, error) { - _, hash, _ := a.val.LastBlockValidatedAndHash() - return hash, nil +func (a *BlockValidatorAPI) LatestValidatedMsgNum(ctx context.Context) (*staker.GlobalStateValidatedInfo, error) { + return a.val.ReadLastValidatedInfo() } type BlockValidatorDebugAPI struct { @@ -38,21 +31,11 @@ type ValidateBlockResult struct { Latency string `json:"latency"` } -func (a *BlockValidatorDebugAPI) ValidateBlock( - ctx context.Context, blockNum rpc.BlockNumber, full bool, moduleRootOptional *common.Hash, +func (a *BlockValidatorDebugAPI) ValidateMessageNumber( + ctx context.Context, msgNum hexutil.Uint64, full bool, moduleRootOptional *common.Hash, ) (ValidateBlockResult, error) { result := ValidateBlockResult{} - if blockNum < 0 { - return result, errors.New("this method only accepts absolute block numbers") - } - header := a.blockchain.GetHeaderByNumber(uint64(blockNum)) - if header == nil { - return result, errors.New("block not found") - } - if !a.blockchain.Config().IsArbitrumNitro(header.Number) { - return result, types.ErrUseFallback - } var moduleRoot common.Hash if moduleRootOptional != nil { moduleRoot = *moduleRootOptional @@ -64,7 +47,7 @@ func (a *BlockValidatorDebugAPI) ValidateBlock( moduleRoot = moduleRoots[0] } start_time := time.Now() - valid, err := a.val.ValidateBlock(ctx, header, full, moduleRoot) + valid, err := a.val.ValidateBlock(ctx, arbutil.MessageIndex(msgNum), full, moduleRoot) result.Valid = valid result.Latency = fmt.Sprintf("%vms", time.Since(start_time).Milliseconds()) return result, err diff --git a/arbnode/execution/block_recorder.go b/arbnode/execution/block_recorder.go new file mode 100644 index 0000000000..b1f9997279 --- /dev/null +++ b/arbnode/execution/block_recorder.go @@ -0,0 +1,357 @@ +package execution + +import ( + "context" + "fmt" + "sync" + "testing" + + "github.com/ethereum/go-ethereum/arbitrum" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/offchainlabs/nitro/arbos" + "github.com/offchainlabs/nitro/arbos/arbosState" + "github.com/offchainlabs/nitro/arbos/arbostypes" + "github.com/offchainlabs/nitro/arbutil" + "github.com/offchainlabs/nitro/validator" +) + +// BlockRecorder uses a separate statedatabase from the blockchain. +// It has access to any state in the HD database, and can compute state as needed. +// We keep references for state of: +// Any block that matches PrepareForRecord that was done recently (according to PrepareDelay config) +// Most recent/advanced header we ever omputed (lastHdr) +// Hopefully - some recent valid block. For that we always keep one candidate block until it becomes validated. +type BlockRecorder struct { + recordingDatabase *arbitrum.RecordingDatabase + execEngine *ExecutionEngine + + lastHdr *types.Header + lastHdrLock sync.Mutex + + validHdrCandidate *types.Header + validHdr *types.Header + validHdrLock sync.Mutex + + preparedQueue []*types.Header + preparedLock sync.Mutex +} + +type RecordResult struct { + Pos arbutil.MessageIndex + BlockHash common.Hash + Preimages map[common.Hash][]byte + BatchInfo []validator.BatchInfo +} + +func NewBlockRecorder(execEngine *ExecutionEngine, ethDb ethdb.Database) *BlockRecorder { + return &BlockRecorder{ + execEngine: execEngine, + recordingDatabase: arbitrum.NewRecordingDatabase(ethDb, execEngine.bc), + } +} + +func stateLogFunc(targetHeader, header *types.Header, hasState bool) { + if targetHeader == nil || header == nil { + return + } + gap := targetHeader.Number.Int64() - header.Number.Int64() + step := int64(500) + stage := "computing state" + if !hasState { + step = 3000 + stage = "looking for full block" + } + if (gap >= step) && (gap%step == 0) { + log.Info("Setting up validation", "stage", stage, "current", header.Number, "target", targetHeader.Number) + } +} + +// If msg is nil, this will record block creation up to the point where message would be accessed (for a "too far" proof) +// If keepreference == true, reference to state of prevHeader is added (no reference added if an error is returned) +func (r *BlockRecorder) RecordBlockCreation( + ctx context.Context, + pos arbutil.MessageIndex, + msg *arbostypes.MessageWithMetadata, +) (*RecordResult, error) { + + blockNum := r.execEngine.MessageIndexToBlockNumber(pos) + + var prevHeader *types.Header + if pos != 0 { + prevHeader = r.execEngine.bc.GetHeaderByNumber(uint64(blockNum - 1)) + if prevHeader == nil { + return nil, fmt.Errorf("pos %d prevHeader not found", pos) + } + } + + recordingdb, chaincontext, recordingKV, err := r.recordingDatabase.PrepareRecording(ctx, prevHeader, stateLogFunc) + if err != nil { + return nil, err + } + defer func() { r.recordingDatabase.Dereference(prevHeader) }() + + chainConfig := r.execEngine.bc.Config() + + // Get the chain ID, both to validate and because the replay binary also gets the chain ID, + // so we need to populate the recordingdb with preimages for retrieving the chain ID. + if prevHeader != nil { + initialArbosState, err := arbosState.OpenSystemArbosState(recordingdb, nil, true) + if err != nil { + return nil, fmt.Errorf("error opening initial ArbOS state: %w", err) + } + chainId, err := initialArbosState.ChainId() + if err != nil { + return nil, fmt.Errorf("error getting chain ID from initial ArbOS state: %w", err) + } + if chainId.Cmp(chainConfig.ChainID) != 0 { + return nil, fmt.Errorf("unexpected chain ID %r in ArbOS state, expected %r", chainId, chainConfig.ChainID) + } + genesisNum, err := initialArbosState.GenesisBlockNum() + if err != nil { + return nil, fmt.Errorf("error getting genesis block number from initial ArbOS state: %w", err) + } + expectedNum := chainConfig.ArbitrumChainParams.GenesisBlockNum + if genesisNum != expectedNum { + return nil, fmt.Errorf("unexpected genesis block number %v in ArbOS state, expected %v", genesisNum, expectedNum) + } + } + + var blockHash common.Hash + var readBatchInfo []validator.BatchInfo + if msg != nil { + batchFetcher := func(batchNum uint64) ([]byte, error) { + data, err := r.execEngine.streamer.FetchBatch(batchNum) + if err != nil { + return nil, err + } + readBatchInfo = append(readBatchInfo, validator.BatchInfo{ + Number: batchNum, + Data: data, + }) + return data, nil + } + // Re-fetch the batch instead of using our cached cost, + // as the replay binary won't have the cache populated. + msg.Message.BatchGasCost = nil + block, _, err := arbos.ProduceBlock( + msg.Message, + msg.DelayedMessagesRead, + prevHeader, + recordingdb, + chaincontext, + chainConfig, + batchFetcher, + ) + if err != nil { + return nil, err + } + blockHash = block.Hash() + } + + preimages, err := r.recordingDatabase.PreimagesFromRecording(chaincontext, recordingKV) + if err != nil { + return nil, err + } + + // check we got the canonical hash + canonicalHash := r.execEngine.bc.GetCanonicalHash(uint64(blockNum)) + if canonicalHash != blockHash { + return nil, fmt.Errorf("Blockhash doesn't match when recording got %v canonical %v", blockHash, canonicalHash) + } + + // these won't usually do much here (they will in preparerecording), but doesn't hurt to check + r.updateLastHdr(prevHeader) + r.updateValidCandidateHdr(prevHeader) + + return &RecordResult{pos, blockHash, preimages, readBatchInfo}, err +} + +func (r *BlockRecorder) updateLastHdr(hdr *types.Header) { + if hdr == nil { + return + } + r.lastHdrLock.Lock() + defer r.lastHdrLock.Unlock() + if r.lastHdr != nil { + if hdr.Number.Cmp(r.lastHdr.Number) <= 0 { + return + } + } + _, err := r.recordingDatabase.StateFor(hdr) + if err != nil { + log.Warn("failed to get state in updateLastHdr", "err", err) + return + } + r.recordingDatabase.Dereference(r.lastHdr) + r.lastHdr = hdr +} + +func (r *BlockRecorder) updateValidCandidateHdr(hdr *types.Header) { + if hdr == nil { + return + } + r.validHdrLock.Lock() + defer r.validHdrLock.Unlock() + // don't need a candidate that's newer than the current one (else it will never become valid) + if r.validHdrCandidate != nil && r.validHdrCandidate.Number.Cmp(hdr.Number) <= 0 { + return + } + // don't need a candidate that's older than known valid + if r.validHdr != nil && r.validHdr.Number.Cmp(hdr.Number) >= 0 { + return + } + _, err := r.recordingDatabase.StateFor(hdr) + if err != nil { + log.Warn("failed to get state in updateLastHdr", "err", err) + return + } + r.validHdrCandidate = hdr +} + +func (r *BlockRecorder) MarkValid(pos arbutil.MessageIndex, resultHash common.Hash) { + r.validHdrLock.Lock() + defer r.validHdrLock.Unlock() + if r.validHdrCandidate == nil { + return + } + validNum := r.execEngine.MessageIndexToBlockNumber(pos) + if r.validHdrCandidate.Number.Uint64() > validNum { + return + } + // make sure the valid is canonical + canonicalResultHash := r.execEngine.bc.GetCanonicalHash(uint64(validNum)) + if canonicalResultHash != resultHash { + log.Warn("markvalid hash not canonical", "pos", pos, "result", resultHash, "canonical", canonicalResultHash) + r.validHdrCandidate = nil + return + } + // make sure the candidate is still canonical + canonicalHash := r.execEngine.bc.GetCanonicalHash(r.validHdrCandidate.Number.Uint64()) + candidateHash := r.validHdrCandidate.Hash() + if canonicalHash != candidateHash { + log.Error("vlid candidate hash not canonical", "number", r.validHdrCandidate.Number, "candidate", candidateHash, "canonical", canonicalHash) + r.validHdrCandidate = nil + return + } + r.recordingDatabase.Dereference(r.validHdr) + r.validHdr = r.validHdrCandidate + r.validHdrCandidate = nil +} + +// TODO: use config +func (r *BlockRecorder) preparedAddTrim(newRefs []*types.Header, size int) { + var oldRefs []*types.Header + r.preparedLock.Lock() + r.preparedQueue = append(r.preparedQueue, newRefs...) + if len(r.preparedQueue) > size { + oldRefs = r.preparedQueue[:len(r.preparedQueue)-size] + r.preparedQueue = r.preparedQueue[len(r.preparedQueue)-size:] + } + r.preparedLock.Unlock() + for _, ref := range oldRefs { + r.recordingDatabase.Dereference(ref) + } +} + +func (r *BlockRecorder) preparedTrimBeyond(hdr *types.Header) { + var oldRefs []*types.Header + var validRefs []*types.Header + r.preparedLock.Lock() + for _, queHdr := range r.preparedQueue { + if queHdr.Number.Cmp(hdr.Number) > 0 { + oldRefs = append(oldRefs, queHdr) + } else { + validRefs = append(validRefs, queHdr) + } + } + r.preparedQueue = validRefs + r.preparedLock.Unlock() + for _, ref := range oldRefs { + r.recordingDatabase.Dereference(ref) + } +} + +func (r *BlockRecorder) TrimAllPrepared(t *testing.T) { + r.preparedAddTrim(nil, 0) +} + +func (r *BlockRecorder) RecordingDBReferenceCount() int64 { + return r.recordingDatabase.ReferenceCount() +} + +func (r *BlockRecorder) PrepareForRecord(ctx context.Context, start, end arbutil.MessageIndex) error { + var references []*types.Header + if end < start { + return fmt.Errorf("illegal range start %d > end %d", start, end) + } + numOfBlocks := uint64(end - start) + hdrNum := r.execEngine.MessageIndexToBlockNumber(start) + if start > 0 { + hdrNum-- // need to get previous + } else { + numOfBlocks-- // genesis block doesn't need preparation, so recording one less block + } + lastHdrNum := hdrNum + numOfBlocks + var header *types.Header + for hdrNum < lastHdrNum { + newHeader := r.execEngine.bc.GetHeaderByNumber(uint64(hdrNum)) + if newHeader == nil { + log.Warn("prepareblocks asked for non-found block", "hdrNum", hdrNum) + break + } + _, err := r.recordingDatabase.GetOrRecreateState(ctx, newHeader, stateLogFunc) + if err != nil { + log.Warn("prepareblocks failed to get state for block", "hdrNum", hdrNum, "err", err) + break + } + header = newHeader + references = append(references, header) + r.updateValidCandidateHdr(header) + hdrNum++ + } + r.updateLastHdr(header) + r.preparedAddTrim(references, 1000) + return nil +} + +func (r *BlockRecorder) ReorgTo(hdr *types.Header) { + r.validHdrLock.Lock() + if r.validHdr.Number.Cmp(hdr.Number) > 0 { + log.Error("block recorder: reorging past previously-marked final block", "reorg target num", hdr.Number, "hash", hdr.Hash(), "reorged past num", r.validHdr.Number, "hash", r.validHdr.Hash()) + r.recordingDatabase.Dereference(r.validHdr) + r.validHdr = nil + } + if r.validHdrCandidate.Number.Cmp(hdr.Number) > 0 { + r.recordingDatabase.Dereference(r.validHdrCandidate) + r.validHdrCandidate = nil + } + r.validHdrLock.Unlock() + r.lastHdrLock.Lock() + if r.lastHdr.Number.Cmp(hdr.Number) > 0 { + r.recordingDatabase.Dereference(r.lastHdr) + r.lastHdr = nil + } + r.lastHdrLock.Unlock() + r.preparedTrimBeyond(hdr) +} + +func (r *BlockRecorder) WriteValidStateToDb() error { + r.validHdrLock.Lock() + defer r.validHdrLock.Unlock() + if r.validHdr == nil { + return nil + } + err := r.recordingDatabase.WriteStateToDatabase(r.validHdr) + r.recordingDatabase.Dereference(r.validHdr) + return err +} + +func (r *BlockRecorder) OrderlyShutdown() { + err := r.WriteValidStateToDb() + if err != nil { + log.Error("failed writing latest valid block state to DB", "err", err) + } +} diff --git a/arbnode/execution/executionengine.go b/arbnode/execution/executionengine.go index de89b2f2e9..842770dbf5 100644 --- a/arbnode/execution/executionengine.go +++ b/arbnode/execution/executionengine.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" @@ -18,7 +19,6 @@ import ( "github.com/offchainlabs/nitro/arbos/arbostypes" "github.com/offchainlabs/nitro/arbos/l1pricing" "github.com/offchainlabs/nitro/arbutil" - "github.com/offchainlabs/nitro/staker" "github.com/offchainlabs/nitro/util/sharedmetrics" "github.com/offchainlabs/nitro/util/stopwaiter" "github.com/pkg/errors" @@ -33,9 +33,8 @@ type TransactionStreamerInterface interface { type ExecutionEngine struct { stopwaiter.StopWaiter - bc *core.BlockChain - validator *staker.BlockValidator - streamer TransactionStreamerInterface + bc *core.BlockChain + streamer TransactionStreamerInterface resequenceChan chan []*arbostypes.MessageWithMetadata createBlocksMutex sync.Mutex @@ -57,16 +56,6 @@ func NewExecutionEngine(bc *core.BlockChain) (*ExecutionEngine, error) { }, nil } -func (s *ExecutionEngine) SetBlockValidator(validator *staker.BlockValidator) { - if s.Started() { - panic("trying to set block validator after start") - } - if s.validator != nil { - panic("trying to set block validator when already set") - } - s.validator = validator -} - func (s *ExecutionEngine) SetReorgSequencingPolicy(reorgSequencing func() *arbos.SequencingHooks) { if s.Started() { panic("trying to set reorg sequencing policy after start") @@ -88,6 +77,9 @@ func (s *ExecutionEngine) SetTransactionStreamer(streamer TransactionStreamerInt } func (s *ExecutionEngine) Reorg(count arbutil.MessageIndex, newMessages []arbostypes.MessageWithMetadata, oldMessages []*arbostypes.MessageWithMetadata) error { + if count == 0 { + return errors.New("cannot reorg out genesis") + } s.createBlocksMutex.Lock() successful := false defer func() { @@ -95,24 +87,15 @@ func (s *ExecutionEngine) Reorg(count arbutil.MessageIndex, newMessages []arbost s.createBlocksMutex.Unlock() } }() - blockNum, err := s.MessageCountToBlockNumber(count) - if err != nil { - return err - } + blockNum := s.MessageIndexToBlockNumber(count - 1) // We can safely cast blockNum to a uint64 as we checked count == 0 above targetBlock := s.bc.GetBlockByNumber(uint64(blockNum)) if targetBlock == nil { log.Warn("reorg target block not found", "block", blockNum) return nil } - if s.validator != nil { - err = s.validator.ReorgToBlock(targetBlock.NumberU64(), targetBlock.Hash()) - if err != nil { - return err - } - } - err = s.bc.ReorgToOldBlock(targetBlock) + err := s.bc.ReorgToOldBlock(targetBlock) if err != nil { return err } @@ -128,11 +111,7 @@ func (s *ExecutionEngine) Reorg(count arbutil.MessageIndex, newMessages []arbost } func (s *ExecutionEngine) HeadMessageNumber() (arbutil.MessageIndex, error) { - msgCount, err := s.BlockNumberToMessageCount(s.bc.CurrentBlock().Header().Number.Uint64()) - if err != nil { - return 0, err - } - return msgCount - 1, err + return s.BlockNumberToMessageIndex(s.bc.CurrentBlock().Header().Number.Uint64()) } func (s *ExecutionEngine) HeadMessageNumberSync(t *testing.T) (arbutil.MessageIndex, error) { @@ -302,7 +281,7 @@ func (s *ExecutionEngine) sequenceTransactionsWithBlockMutex(header *arbostypes. DelayedMessagesRead: delayedMessagesRead, } - pos, err := s.BlockNumberToMessageCount(lastBlockHeader.Number.Uint64()) + pos, err := s.BlockNumberToMessageIndex(lastBlockHeader.Number.Uint64() + 1) if err != nil { return nil, err } @@ -319,31 +298,23 @@ func (s *ExecutionEngine) sequenceTransactionsWithBlockMutex(header *arbostypes. return nil, err } - if s.validator != nil { - s.validator.NewBlock(block, lastBlockHeader, msgWithMeta) - } - return block, nil } -func (s *ExecutionEngine) GetGenesisBlockNumber() (uint64, error) { - return s.bc.Config().ArbitrumChainParams.GenesisBlockNum, nil +func (s *ExecutionEngine) GetGenesisBlockNumber() uint64 { + return s.bc.Config().ArbitrumChainParams.GenesisBlockNum } -func (s *ExecutionEngine) BlockNumberToMessageCount(blockNum uint64) (arbutil.MessageIndex, error) { - genesis, err := s.GetGenesisBlockNumber() - if err != nil { - return 0, err +func (s *ExecutionEngine) BlockNumberToMessageIndex(blockNum uint64) (arbutil.MessageIndex, error) { + genesis := s.GetGenesisBlockNumber() + if blockNum < genesis { + return 0, fmt.Errorf("blockNum %d < genesis %d", blockNum, genesis) } - return arbutil.BlockNumberToMessageCount(blockNum, genesis), nil + return arbutil.MessageIndex(blockNum - genesis), nil } -func (s *ExecutionEngine) MessageCountToBlockNumber(messageNum arbutil.MessageIndex) (int64, error) { - genesis, err := s.GetGenesisBlockNumber() - if err != nil { - return 0, err - } - return arbutil.MessageCountToBlockNumber(messageNum, genesis), nil +func (s *ExecutionEngine) MessageIndexToBlockNumber(messageNum arbutil.MessageIndex) uint64 { + return uint64(messageNum) + s.GetGenesisBlockNumber() } func (s *ExecutionEngine) SequenceDelayedMessage(message *arbostypes.L1IncomingMessage, delayedSeqNum uint64) error { @@ -363,7 +334,7 @@ func (s *ExecutionEngine) sequenceDelayedMessageWithBlockMutex(message *arbostyp expectedDelayed := currentHeader.Nonce.Uint64() - pos, err := s.BlockNumberToMessageCount(currentHeader.Number.Uint64()) + lastMsg, err := s.BlockNumberToMessageIndex(currentHeader.Number.Uint64()) if err != nil { return err } @@ -377,7 +348,7 @@ func (s *ExecutionEngine) sequenceDelayedMessageWithBlockMutex(message *arbostyp DelayedMessagesRead: delayedSeqNum + 1, } - err = s.streamer.WriteMessageFromSequencer(pos, messageWithMeta) + err = s.streamer.WriteMessageFromSequencer(lastMsg+1, messageWithMeta) if err != nil { return err } @@ -393,7 +364,7 @@ func (s *ExecutionEngine) sequenceDelayedMessageWithBlockMutex(message *arbostyp return err } - log.Info("ExecutionEngine: Added DelayedMessages", "pos", pos, "delayed", delayedSeqNum, "block-header", block.Header()) + log.Info("ExecutionEngine: Added DelayedMessages", "pos", lastMsg+1, "delayed", delayedSeqNum, "block-header", block.Header()) return nil } @@ -441,6 +412,26 @@ func (s *ExecutionEngine) appendBlock(block *types.Block, statedb *state.StateDB return nil } +type MessageResult struct { + BlockHash common.Hash + SendRoot common.Hash +} + +func (s *ExecutionEngine) resultFromHeader(header *types.Header) (*MessageResult, error) { + if header == nil { + return nil, fmt.Errorf("result not found") + } + info, err := types.DeserializeHeaderExtraInformation(header) + if err != nil { + return nil, err + } + return &MessageResult{header.Hash(), info.SendRoot}, nil +} + +func (s *ExecutionEngine) ResultAtPos(pos arbutil.MessageIndex) (*MessageResult, error) { + return s.resultFromHeader(s.bc.GetHeaderByNumber(s.MessageIndexToBlockNumber(pos))) +} + func (s *ExecutionEngine) DigestMessage(num arbutil.MessageIndex, msg *arbostypes.MessageWithMetadata) error { if !s.createBlocksMutex.TryLock() { return errors.New("mutex held") @@ -451,12 +442,12 @@ func (s *ExecutionEngine) DigestMessage(num arbutil.MessageIndex, msg *arbostype func (s *ExecutionEngine) digestMessageWithBlockMutex(num arbutil.MessageIndex, msg *arbostypes.MessageWithMetadata) error { currentHeader := s.bc.CurrentHeader() - expNum, err := s.BlockNumberToMessageCount(currentHeader.Number.Uint64()) + curMsg, err := s.BlockNumberToMessageIndex(currentHeader.Number.Uint64()) if err != nil { return err } - if expNum != num { - return fmt.Errorf("wrong message number in digest got %d expected %d", num, expNum) + if curMsg+1 != num { + return fmt.Errorf("wrong message number in digest got %d expected %d", num, curMsg+1) } startTime := time.Now() @@ -470,10 +461,6 @@ func (s *ExecutionEngine) digestMessageWithBlockMutex(num arbutil.MessageIndex, return err } - if s.validator != nil { - s.validator.NewBlock(block, currentHeader, *msg) - } - if time.Now().After(s.nextScheduledVersionCheck) { s.nextScheduledVersionCheck = time.Now().Add(time.Minute) arbState, err := arbosState.OpenSystemArbosState(statedb, nil, true) diff --git a/arbnode/execution/node.go b/arbnode/execution/node.go index 6dab60042a..5f5fcfb148 100644 --- a/arbnode/execution/node.go +++ b/arbnode/execution/node.go @@ -17,6 +17,7 @@ type ExecutionNode struct { FilterSystem *filters.FilterSystem ArbInterface *ArbInterface ExecEngine *ExecutionEngine + Recorder *BlockRecorder Sequencer *Sequencer // either nil or same as TxPublisher TxPublisher TransactionPublisher } @@ -37,6 +38,7 @@ func CreateExecutionNode( if err != nil { return nil, err } + recorder := NewBlockRecorder(execEngine, chainDB) var txPublisher TransactionPublisher var sequencer *Sequencer seqConfig := seqConfigFetcher() @@ -77,6 +79,7 @@ func CreateExecutionNode( filterSystem, arbInterface, execEngine, + recorder, sequencer, txPublisher, }, nil diff --git a/arbnode/inbox_test.go b/arbnode/inbox_test.go index 1e110fd413..946a8af996 100644 --- a/arbnode/inbox_test.go +++ b/arbnode/inbox_test.go @@ -179,10 +179,17 @@ func TestTransactionStreamer(t *testing.T) { } // Check that state balances are consistent with blockchain's balances - lastBlockNumber := bc.CurrentHeader().Number.Uint64() expectedLastBlockNumber := blockStates[len(blockStates)-1].blockNumber - if lastBlockNumber != expectedLastBlockNumber { - Fail(t, "unexpected block number", lastBlockNumber, "vs", expectedLastBlockNumber) + for i := 0; ; i++ { + lastBlockNumber := bc.CurrentHeader().Number.Uint64() + if lastBlockNumber == expectedLastBlockNumber { + break + } else if lastBlockNumber > expectedLastBlockNumber { + Fail(t, "unexpected block number", lastBlockNumber, "vs", expectedLastBlockNumber) + } else if i == 10 { + Fail(t, "timeout waiting for block number", expectedLastBlockNumber, "current", lastBlockNumber) + } + time.Sleep(time.Millisecond * 100) } for _, state := range blockStates { diff --git a/arbnode/inbox_tracker.go b/arbnode/inbox_tracker.go index df4985e69c..980f4e97e8 100644 --- a/arbnode/inbox_tracker.go +++ b/arbnode/inbox_tracker.go @@ -612,18 +612,6 @@ func (t *InboxTracker) AddSequencerBatches(ctx context.Context, client arbutil.L } t.batchMetaMutex.Unlock() - if t.validator != nil { - batchBytes := make([][]byte, 0, len(batches)) - for _, batch := range batches { - msg, err := batch.Serialize(ctx, client) - if err != nil { - return err - } - batchBytes = append(batchBytes, msg) - } - t.validator.ProcessBatches(startPos, batchBytes) - } - if t.txStreamer.broadcastServer != nil && pos > 1 { prevprevbatchmeta, err := t.GetBatchMetadata(pos - 2) if errors.Is(err, AccumulatorNotFoundErr) { diff --git a/arbnode/node.go b/arbnode/node.go index bcdfa9386a..608a7f7a0d 100644 --- a/arbnode/node.go +++ b/arbnode/node.go @@ -650,9 +650,10 @@ func createNodeImpl( l2Config := l2BlockChain.Config() l2ChainId := l2Config.ChainID.Uint64() - var reorgingToBlock *types.Block + //TODO: + // var reorgingToBlock *types.Block if config.Dangerous.ReorgToBlock >= 0 { - reorgingToBlock, err = execution.ReorgToBlock(l2BlockChain, uint64(config.Dangerous.ReorgToBlock)) + _, err = execution.ReorgToBlock(l2BlockChain, uint64(config.Dangerous.ReorgToBlock)) if err != nil { return nil, err } @@ -830,8 +831,7 @@ func createNodeImpl( inboxReader, inboxTracker, txStreamer, - l2BlockChain, - chainDb, + exec.Recorder, rawdb.NewTable(arbDb, blockValidatorPrefix), daReader, &configFetcher.Get().BlockValidator, @@ -850,7 +850,6 @@ func createNodeImpl( statelessBlockValidator, inboxTracker, txStreamer, - reorgingToBlock, func() *staker.BlockValidatorConfig { return &configFetcher.Get().BlockValidator }, fatalErrChan, ) @@ -1165,6 +1164,7 @@ func (n *Node) StopAndWait() { if n.StatelessBlockValidator != nil { n.StatelessBlockValidator.Stop() } + n.Execution.Recorder.OrderlyShutdown() if n.InboxReader != nil && n.InboxReader.Started() { n.InboxReader.StopAndWait() } diff --git a/arbnode/sync_monitor.go b/arbnode/sync_monitor.go index fe1887a14a..fe0371033d 100644 --- a/arbnode/sync_monitor.go +++ b/arbnode/sync_monitor.go @@ -148,8 +148,8 @@ func (s *SyncMonitor) SafeBlockNumber(ctx context.Context) (uint64, error) { if err != nil { return 0, err } - block, err := s.txStreamer.exec.MessageCountToBlockNumber(msg) - return uint64(block), err + block := s.txStreamer.exec.MessageIndexToBlockNumber(msg - 1) + return block, nil } func (s *SyncMonitor) FinalizedBlockNumber(ctx context.Context) (uint64, error) { @@ -160,8 +160,8 @@ func (s *SyncMonitor) FinalizedBlockNumber(ctx context.Context) (uint64, error) if err != nil { return 0, err } - block, err := s.txStreamer.exec.MessageCountToBlockNumber(msg) - return uint64(block), err + block := s.txStreamer.exec.MessageIndexToBlockNumber(msg - 1) + return block, nil } func (s *SyncMonitor) Synced() bool { diff --git a/arbnode/transaction_streamer.go b/arbnode/transaction_streamer.go index 0068f3b855..3853204f83 100644 --- a/arbnode/transaction_streamer.go +++ b/arbnode/transaction_streamer.go @@ -42,6 +42,7 @@ type TransactionStreamer struct { chainConfig *params.ChainConfig exec *execution.ExecutionEngine + validator *staker.BlockValidator db ethdb.Database fatalErrChan chan<- error @@ -115,7 +116,13 @@ func uint64ToKey(x uint64) []byte { // TODO: this is needed only for block validator func (s *TransactionStreamer) SetBlockValidator(validator *staker.BlockValidator) { - s.exec.SetBlockValidator(validator) + if s.Started() { + panic("trying to set coordinator after start") + } + if s.validator != nil { + panic("trying to set coordinator when already set") + } + s.validator = validator } func (s *TransactionStreamer) SetSeqCoordinator(coordinator *SeqCoordinator) { @@ -274,6 +281,13 @@ func (s *TransactionStreamer) reorg(batch ethdb.Batch, count arbutil.MessageInde return err } + if s.validator != nil { + err = s.validator.Reorg(s.GetContext(), count) + if err != nil { + return err + } + } + err = deleteStartingAt(s.db, batch, messagePrefix, uint64ToKey(uint64(count))) if err != nil { return err @@ -333,6 +347,21 @@ func (s *TransactionStreamer) GetMessageCount() (arbutil.MessageIndex, error) { return arbutil.MessageIndex(pos), nil } +func (s *TransactionStreamer) GetProcessedMessageCount() (arbutil.MessageIndex, error) { + msgCount, err := s.GetMessageCount() + if err != nil { + return 0, err + } + digestedHead, err := s.exec.HeadMessageNumber() + if err != nil { + return 0, err + } + if msgCount > digestedHead+1 { + return digestedHead + 1, nil + } + return msgCount, nil +} + func (s *TransactionStreamer) AddMessages(pos arbutil.MessageIndex, messagesAreConfirmed bool, messages []arbostypes.MessageWithMetadata) error { return s.AddMessagesAndEndBatch(pos, messagesAreConfirmed, messages, nil) } @@ -817,6 +846,14 @@ func (s *TransactionStreamer) writeMessages(pos arbutil.MessageIndex, messages [ return nil } +// TODO: eventually there will be a table maintained by txStreamer itself +func (s *TransactionStreamer) ResultAtCount(count arbutil.MessageIndex) (*execution.MessageResult, error) { + if count == 0 { + return &execution.MessageResult{}, nil + } + return s.exec.ResultAtPos(count - 1) +} + // return value: true if should be called again func (s *TransactionStreamer) feedNextMsg(ctx context.Context, exec *execution.ExecutionEngine) bool { if ctx.Err() != nil { @@ -850,6 +887,7 @@ func (s *TransactionStreamer) feedNextMsg(ctx context.Context, exec *execution.E log.Info("feedOneMsg failed to send message to execEngine", "err", err, "pos", pos) return false } + return pos+1 < msgCount } diff --git a/staker/block_challenge_backend.go b/staker/block_challenge_backend.go index 9814d17d6f..e545efd23e 100644 --- a/staker/block_challenge_backend.go +++ b/staker/block_challenge_backend.go @@ -11,7 +11,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/math" - "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/offchainlabs/nitro/arbutil" @@ -20,14 +19,13 @@ import ( ) type BlockChallengeBackend struct { - bc *core.BlockChain - startBlock int64 + streamer TransactionStreamerInterface + startMsgCount arbutil.MessageIndex startPosition uint64 endPosition uint64 startGs validator.GoGlobalState endGs validator.GoGlobalState inboxTracker InboxTrackerInterface - genesisBlockNumber uint64 tooFarStartsAtPosition uint64 } @@ -36,19 +34,10 @@ var _ ChallengeBackend = (*BlockChallengeBackend)(nil) func NewBlockChallengeBackend( initialState *challengegen.ChallengeManagerInitiatedChallenge, - bc *core.BlockChain, + streamer TransactionStreamerInterface, inboxTracker InboxTrackerInterface, - genesisBlockNumber uint64, ) (*BlockChallengeBackend, error) { startGs := validator.GoGlobalStateFromSolidity(initialState.StartState) - startBlockNum := arbutil.MessageCountToBlockNumber(0, genesisBlockNumber) - if startGs.BlockHash != (common.Hash{}) { - startBlock := bc.GetBlockByHash(startGs.BlockHash) - if startBlock == nil { - return nil, fmt.Errorf("failed to find start block %v", startGs.BlockHash) - } - startBlockNum = int64(startBlock.NumberU64()) - } var startMsgCount arbutil.MessageIndex if startGs.Batch > 0 { @@ -59,10 +48,6 @@ func NewBlockChallengeBackend( } } startMsgCount += arbutil.MessageIndex(startGs.PosInBatch) - expectedMsgCount := arbutil.SignedBlockNumberToMessageCount(startBlockNum, genesisBlockNumber) - if startMsgCount != expectedMsgCount { - return nil, fmt.Errorf("start block %v and start message count %v don't correspond", startBlockNum, startMsgCount) - } endGs := validator.GoGlobalStateFromSolidity(initialState.EndState) var endMsgCount arbutil.MessageIndex @@ -76,19 +61,18 @@ func NewBlockChallengeBackend( endMsgCount += arbutil.MessageIndex(endGs.PosInBatch) return &BlockChallengeBackend{ - bc: bc, - startBlock: startBlockNum, + streamer: streamer, + startMsgCount: startMsgCount, startGs: startGs, startPosition: 0, endPosition: math.MaxUint64, endGs: endGs, inboxTracker: inboxTracker, - genesisBlockNumber: genesisBlockNumber, tooFarStartsAtPosition: uint64(endMsgCount - startMsgCount + 1), }, nil } -func (b *BlockChallengeBackend) findBatchFromMessageIndex(msgCount arbutil.MessageIndex) (uint64, error) { +func (b *BlockChallengeBackend) findBatchFromMessageCount(msgCount arbutil.MessageIndex) (uint64, error) { if msgCount == 0 { return 0, nil } @@ -119,12 +103,8 @@ func (b *BlockChallengeBackend) findBatchFromMessageIndex(msgCount arbutil.Messa } } -func (b *BlockChallengeBackend) FindGlobalStateFromHeader(header *types.Header) (validator.GoGlobalState, error) { - if header == nil { - return validator.GoGlobalState{}, nil - } - msgCount := arbutil.BlockNumberToMessageCount(header.Number.Uint64(), b.genesisBlockNumber) - batch, err := b.findBatchFromMessageIndex(msgCount) +func (b *BlockChallengeBackend) FindGlobalStateFromMessageCount(count arbutil.MessageIndex) (validator.GoGlobalState, error) { + batch, err := b.findBatchFromMessageCount(count) if err != nil { return validator.GoGlobalState{}, err } @@ -134,42 +114,35 @@ func (b *BlockChallengeBackend) FindGlobalStateFromHeader(header *types.Header) if err != nil { return validator.GoGlobalState{}, err } - if batchMsgCount > msgCount { + if batchMsgCount > count { return validator.GoGlobalState{}, errors.New("findBatchFromMessageCount returned bad batch") } } - extraInfo, err := types.DeserializeHeaderExtraInformation(header) + res, err := b.streamer.ResultAtCount(count) if err != nil { return validator.GoGlobalState{}, err } return validator.GoGlobalState{ - BlockHash: header.Hash(), - SendRoot: extraInfo.SendRoot, + BlockHash: res.BlockHash, + SendRoot: res.SendRoot, Batch: batch, - PosInBatch: uint64(msgCount - batchMsgCount), + PosInBatch: uint64(count - batchMsgCount), }, nil } const StatusFinished uint8 = 1 const StatusTooFar uint8 = 3 -func (b *BlockChallengeBackend) GetBlockNrAtStep(step uint64) (int64, bool) { - return b.startBlock + int64(step), step >= b.tooFarStartsAtPosition +func (b *BlockChallengeBackend) GetMessageCountAtStep(step uint64) (arbutil.MessageIndex, bool) { + return b.startMsgCount + arbutil.MessageIndex(step), step >= b.tooFarStartsAtPosition } func (b *BlockChallengeBackend) GetInfoAtStep(step uint64) (validator.GoGlobalState, uint8, error) { - blockNum, tooFar := b.GetBlockNrAtStep(step) + msgNum, tooFar := b.GetMessageCountAtStep(step) if tooFar { return validator.GoGlobalState{}, StatusTooFar, nil } - var header *types.Header - if blockNum != -1 { - header = b.bc.GetHeaderByNumber(uint64(blockNum)) - if header == nil { - return validator.GoGlobalState{}, 0, fmt.Errorf("failed to get block %v in block challenge", blockNum) - } - } - globalState, err := b.FindGlobalStateFromHeader(header) + globalState, err := b.FindGlobalStateFromMessageCount(msgNum) if err != nil { return validator.GoGlobalState{}, 0, err } diff --git a/staker/block_validator.go b/staker/block_validator.go index ea6af27484..d389d2bc35 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -8,17 +8,17 @@ import ( "fmt" "sync" "sync/atomic" + "testing" "time" "github.com/pkg/errors" flag "github.com/spf13/pflag" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" - "github.com/offchainlabs/nitro/arbos/arbostypes" "github.com/offchainlabs/nitro/arbutil" + "github.com/offchainlabs/nitro/util/containers" "github.com/offchainlabs/nitro/util/stopwaiter" "github.com/offchainlabs/nitro/validator" ) @@ -27,35 +27,35 @@ type BlockValidator struct { stopwaiter.StopWaiter *StatelessBlockValidator - validations sync.Map - sequencerBatches sync.Map - blockMutex sync.Mutex - batchMutex sync.Mutex - reorgMutex sync.Mutex - reorgsPending int32 // atomic + validations containers.SyncMap[arbutil.MessageIndex, *validationStatus] + reorgMutex sync.RWMutex - lastBlockValidated uint64 // both atomic and behind lastBlockValidatedMutex - lastBlockValidatedHash common.Hash // behind lastBlockValidatedMutex - lastBlockValidatedMutex sync.Mutex - earliestBatchKept uint64 - nextBatchKept uint64 // 1 + the last batch number kept + lastValidGS validator.GoGlobalState + // validInfoPrintTime time.Time TODO: print validated once in a while.. + chainCaughtUp bool - nextBlockToValidate uint64 - lastValidationEntryBlock uint64 // used to delete entries in reorg, protected by blockMutex - lastBlockValidatedUnknown bool - globalPosNextSend GlobalStatePosition + lastCreateBatch []byte + lastCreateBatchMsgCount arbutil.MessageIndex + lastCreateGS validator.GoGlobalState + lastCreateDelayed uint64 - config BlockValidatorConfigFetcher + createdA uint64 + recordSentA uint64 + validatedA uint64 + + // only used by record loop and reorg, not atomic + recordPrepardPos arbutil.MessageIndex + nextRecordPrepared *containers.Promise[arbutil.MessageIndex] + // only used by validation loop and reorg, not atomic + valLoopPos arbutil.MessageIndex - sendValidationsChan chan struct{} - progressChan chan uint64 + config BlockValidatorConfigFetcher - lastHeaderForPrepareState *types.Header + createNodesChan chan struct{} + sendRecordChan chan struct{} + progressValidationsChan chan struct{} - // recentValid holds one recently valid header, to commit it to DB on shutdown - recentValidMutex sync.Mutex - awaitingValidation *types.Header - validHeader *types.Header + testingProgressMadeChan chan struct{} fatalErr chan<- error } @@ -129,13 +129,12 @@ var DefaultBlockValidatorDangerousConfig = BlockValidatorDangerousConfig{ type valStatusField uint32 const ( - Unprepared valStatusField = iota + Created valStatusField = iota RecordSent RecordFailed Prepared + SendingValidation ValidationSent - Failed - Valid ) type validationStatus struct { @@ -145,10 +144,6 @@ type validationStatus struct { Runs []validator.ValidationRun // if status >= ValidationSent } -func (s *validationStatus) setStatus(val valStatusField) { - atomic.StoreUint32(&s.Status, uint32(val)) -} - func (s *validationStatus) getStatus() valStatusField { uintStat := atomic.LoadUint32(&s.Status) return valStatusField(uintStat) @@ -162,26 +157,55 @@ func NewBlockValidator( statelessBlockValidator *StatelessBlockValidator, inbox InboxTrackerInterface, streamer TransactionStreamerInterface, - reorgingToBlock *types.Block, config BlockValidatorConfigFetcher, fatalErr chan<- error, ) (*BlockValidator, error) { validator := &BlockValidator{ StatelessBlockValidator: statelessBlockValidator, - sendValidationsChan: make(chan struct{}, 1), - progressChan: make(chan uint64, 1), + createNodesChan: make(chan struct{}, 1), + sendRecordChan: make(chan struct{}, 1), + progressValidationsChan: make(chan struct{}, 1), config: config, fatalErr: fatalErr, } - err := validator.readLastBlockValidatedDbInfo(reorgingToBlock) - if err != nil { - return nil, err + if !config().Dangerous.ResetBlockValidation { + validated, err := validator.ReadLastValidatedInfo() + if err != nil { + return nil, err + } + if validated != nil { + validator.lastValidGS = validated.GlobalState + } } streamer.SetBlockValidator(validator) inbox.SetBlockValidator(validator) return validator, nil } +func atomicStorePos(addr *uint64, val arbutil.MessageIndex) { + atomic.StoreUint64(addr, uint64(val)) +} + +func atomicLoadPos(addr *uint64) arbutil.MessageIndex { + return arbutil.MessageIndex(atomic.LoadUint64(addr)) +} + +func (v *BlockValidator) created() arbutil.MessageIndex { + return atomicLoadPos(&v.createdA) +} + +func (v *BlockValidator) recordSent() arbutil.MessageIndex { + return atomicLoadPos(&v.recordSentA) +} + +func (v *BlockValidator) validated() arbutil.MessageIndex { + return atomicLoadPos(&v.validatedA) +} + +func (v *BlockValidator) Validated(t *testing.T) arbutil.MessageIndex { + return v.validated() +} + func (v *BlockValidator) possiblyFatal(err error) { if v.Stopped() { return @@ -198,135 +222,122 @@ func (v *BlockValidator) possiblyFatal(err error) { } } -func (v *BlockValidator) triggerSendValidations() { +func nonBlockingTriger(channel chan struct{}) { select { - case v.sendValidationsChan <- struct{}{}: + case channel <- struct{}{}: default: } } -func (v *BlockValidator) recentlyValid(header *types.Header) { - v.recentValidMutex.Lock() - defer v.recentValidMutex.Unlock() - if v.awaitingValidation == nil { - return - } - if v.awaitingValidation.Number.Cmp(header.Number) > 0 { - return +// called from NewBlockValidator, doesn't need to catch locks +func (v *BlockValidator) ReadLastValidatedInfo() (*GlobalStateValidatedInfo, error) { + exists, err := v.db.Has(lastGlobalStateValidatedInfoKey) + if err != nil { + return nil, err } - if v.validHeader != nil { - v.recordingDatabase.Dereference(v.validHeader) + var validated GlobalStateValidatedInfo + if !exists { + return nil, nil } - v.validHeader = v.awaitingValidation - v.awaitingValidation = nil -} - -func (v *BlockValidator) recentStateComputed(header *types.Header) { - v.recentValidMutex.Lock() - defer v.recentValidMutex.Unlock() - if v.awaitingValidation != nil { - return - } - _, err := v.recordingDatabase.StateFor(header) + gsBytes, err := v.db.Get(lastGlobalStateValidatedInfoKey) if err != nil { - log.Error("failed to get state for block while validating", "err", err, "blockNum", header.Number, "hash", header.Hash()) - return + return nil, err } - v.awaitingValidation = header -} - -func (v *BlockValidator) recentShutdown() error { - v.recentValidMutex.Lock() - defer v.recentValidMutex.Unlock() - if v.validHeader == nil { - return nil + err = rlp.DecodeBytes(gsBytes, &validated) + if err != nil { + return nil, err } - err := v.recordingDatabase.WriteStateToDatabase(v.validHeader) - v.recordingDatabase.Dereference(v.validHeader) - return err + return &validated, nil } -func (v *BlockValidator) readLastBlockValidatedDbInfo(reorgingToBlock *types.Block) error { - v.lastBlockValidatedMutex.Lock() - defer v.lastBlockValidatedMutex.Unlock() +var ErrGlobalStateNotInChain = errors.New("globalstate not in chain") - exists, err := v.db.Has(lastBlockValidatedInfoKey) +// false if chain not caught up to globalstate +// error is ErrGlobalStateNotInChain if globalstate not in chain (and chain caught up) +func GlobalStateToMsgCount(tracker InboxTrackerInterface, streamer TransactionStreamerInterface, gs validator.GoGlobalState) (bool, arbutil.MessageIndex, error) { + batchCount, err := tracker.GetBatchCount() if err != nil { - return err + return false, 0, err } - - if !exists || v.config().Dangerous.ResetBlockValidation { - // The db contains no validation info; start from the beginning. - // TODO: this skips validating the genesis block. - atomic.StoreUint64(&v.lastBlockValidated, v.genesisBlockNum) - genesisBlock := v.blockchain.GetBlockByNumber(v.genesisBlockNum) - if genesisBlock == nil { - return fmt.Errorf("blockchain missing genesis block number %v", v.genesisBlockNum) - } - v.lastBlockValidatedHash = genesisBlock.Hash() - v.nextBlockToValidate = v.genesisBlockNum + 1 - v.globalPosNextSend = GlobalStatePosition{ - BatchNumber: 1, - PosInBatch: 0, + if batchCount <= gs.Batch { + return false, 0, nil + } + var prevBatchMsgCount arbutil.MessageIndex + if gs.Batch > 0 { + prevBatchMsgCount, err = tracker.GetBatchMessageCount(gs.Batch - 1) + if err != nil { + return false, 0, err } - return nil } - - infoBytes, err := v.db.Get(lastBlockValidatedInfoKey) + count := prevBatchMsgCount + arbutil.MessageIndex(gs.PosInBatch) + curBatchMsgCount, err := tracker.GetBatchMessageCount(gs.Batch) if err != nil { - return err + return false, 0, fmt.Errorf("%w: getBatchMsgCount %d batchCount %d", err, gs.Batch, batchCount) } - - var info lastBlockValidatedDbInfo - err = rlp.DecodeBytes(infoBytes, &info) + if curBatchMsgCount < count { + return false, 0, fmt.Errorf("%w: batch %d posInBatch %d, maxPosInBatch %d", ErrGlobalStateNotInChain, gs.Batch, gs.PosInBatch, curBatchMsgCount-prevBatchMsgCount) + } + processed, err := streamer.GetProcessedMessageCount() if err != nil { - return err + return false, 0, err } - - if reorgingToBlock != nil && reorgingToBlock.NumberU64() >= info.BlockNumber { - // Disregard this reorg as it doesn't affect the last validated block - reorgingToBlock = nil + if processed < count { + return false, 0, nil } + res, err := streamer.ResultAtCount(count) + if err != nil { + return false, 0, err + } + if res.BlockHash != gs.BlockHash || res.SendRoot != gs.SendRoot { + return false, 0, fmt.Errorf("%w: count %d hash %v expected %v, sendroot %v expected %v", ErrGlobalStateNotInChain, count, gs.BlockHash, res.BlockHash, gs.SendRoot, res.SendRoot) + } + return true, count, nil +} - if reorgingToBlock == nil { - expectedHash := v.blockchain.GetCanonicalHash(info.BlockNumber) - if expectedHash != info.BlockHash && (expectedHash != common.Hash{}) { - return fmt.Errorf("last validated block %v stored with hash %v, but blockchain has hash %v", info.BlockNumber, info.BlockHash, expectedHash) +func (v *BlockValidator) checkValidatedGSCaughUp(ctx context.Context) error { + if v.chainCaughtUp { + return nil + } + v.reorgMutex.Lock() + defer v.reorgMutex.Unlock() + var count arbutil.MessageIndex + if v.lastValidGS.Batch > 0 { + var caughtUp bool + var err error + caughtUp, count, err = GlobalStateToMsgCount(v.inboxTracker, v.streamer, v.lastValidGS) + if err != nil { + return err + } + if !caughtUp { + return nil } } - - atomic.StoreUint64(&v.lastBlockValidated, info.BlockNumber) - v.lastBlockValidatedHash = info.BlockHash - v.nextBlockToValidate = v.lastBlockValidated + 1 - v.globalPosNextSend = info.AfterPosition - - if reorgingToBlock != nil { - err = v.reorgToBlockImpl(reorgingToBlock.NumberU64(), reorgingToBlock.Hash(), true) + if v.lastValidGS.PosInBatch != 0 { + found, err := v.readLastCreatedBatch(ctx, v.lastValidGS.Batch) if err != nil { return err } + if !found { + return fmt.Errorf("couldn't find batch %d though caught up", v.lastValidGS.Batch) + } } - + v.lastCreateGS = v.lastValidGS + atomicStorePos(&v.createdA, count) + atomicStorePos(&v.recordSentA, count) + atomicStorePos(&v.validatedA, count) + v.chainCaughtUp = true return nil } -func (v *BlockValidator) sendRecord(s *validationStatus, mustDeref bool) error { +func (v *BlockValidator) sendRecord(s *validationStatus) error { if !v.Started() { - // this could only be sent by NewBlock, so mustDeref is not sent return nil } - prevHeader := s.Entry.PrevBlockHeader - if !s.replaceStatus(Unprepared, RecordSent) { - if mustDeref { - v.recordingDatabase.Dereference(prevHeader) - } + if !s.replaceStatus(Created, RecordSent) { return errors.Errorf("failed status check for send record. Status: %v", s.getStatus()) } v.LaunchThread(func(ctx context.Context) { - if mustDeref { - defer v.recordingDatabase.Dereference(prevHeader) - } - err := v.ValidationEntryRecord(ctx, s.Entry, true) + err := v.ValidationEntryRecord(ctx, s.Entry) if ctx.Err() != nil { return } @@ -335,83 +346,26 @@ func (v *BlockValidator) sendRecord(s *validationStatus, mustDeref bool) error { log.Error("Error while recording", "err", err, "status", s.getStatus()) return } - v.recentStateComputed(prevHeader) - v.recordingDatabase.Dereference(prevHeader) // removes the reference added by ValidationEntryRecord if !s.replaceStatus(RecordSent, Prepared) { log.Error("Fault trying to update validation with recording", "entry", s.Entry, "status", s.getStatus()) return } - v.triggerSendValidations() + nonBlockingTriger(v.progressValidationsChan) }) return nil } -func (v *BlockValidator) newValidationStatus(prevHeader, header *types.Header, msg *arbostypes.MessageWithMetadata) (*validationStatus, error) { - entry, err := newValidationEntry(prevHeader, header, msg) - if err != nil { - return nil, err - } - status := &validationStatus{ - Status: uint32(Unprepared), - Entry: entry, - } - return status, nil -} - -func (v *BlockValidator) NewBlock(block *types.Block, prevHeader *types.Header, msg arbostypes.MessageWithMetadata) { - v.blockMutex.Lock() - defer v.blockMutex.Unlock() - blockNum := block.NumberU64() - if blockNum < v.lastBlockValidated { - return - } - if v.lastBlockValidatedUnknown { - if block.Hash() == v.lastBlockValidatedHash { - v.lastBlockValidated = blockNum - v.nextBlockToValidate = blockNum + 1 - v.lastBlockValidatedUnknown = false - log.Info("Block building caught up to staker", "blockNr", v.lastBlockValidated, "blockHash", v.lastBlockValidatedHash) - // note: this block is already valid - } - return - } - if v.nextBlockToValidate+v.config().ForwardBlocks <= blockNum { - return - } - status, err := v.newValidationStatus(prevHeader, block.Header(), &msg) - if err != nil { - log.Error("failed creating validation status", "err", err) - return - } - // It's fine to separately load and then store as we have the blockMutex acquired - _, present := v.validations.Load(blockNum) - if present { - return - } - v.validations.Store(blockNum, status) - if v.lastValidationEntryBlock < blockNum { - v.lastValidationEntryBlock = blockNum - } - v.triggerSendValidations() -} - //nolint:gosec func (v *BlockValidator) writeToFile(validationEntry *validationEntry, moduleRoot common.Hash) error { input, err := validationEntry.ToInput() if err != nil { return err } - expOut, err := validationEntry.expectedEnd() - if err != nil { - return err - } - return v.execSpawner.WriteToFile(input, expOut, moduleRoot) + return v.execSpawner.WriteToFile(input, validationEntry.End, moduleRoot) } func (v *BlockValidator) SetCurrentWasmModuleRoot(hash common.Hash) error { - v.blockMutex.Lock() v.moduleMutex.Lock() - defer v.blockMutex.Unlock() defer v.moduleMutex.Unlock() if (hash == common.Hash{}) { @@ -438,489 +392,404 @@ func (v *BlockValidator) SetCurrentWasmModuleRoot(hash common.Hash) error { ) } -var ErrValidationCanceled = errors.New("validation of block cancelled") +func (v *BlockValidator) readLastCreatedBatch(ctx context.Context, batchNum uint64) (bool, error) { + batchCount, err := v.inboxTracker.GetBatchCount() + if err != nil { + return false, err + } + if batchCount < batchNum { + return false, nil + } + batchMsgCount, err := v.inboxTracker.GetBatchMessageCount(v.lastCreateGS.Batch) + if err != nil { + return false, err + } + batch, err := v.inboxReader.GetSequencerMessageBytes(ctx, batchNum) + if err != nil { + return false, err + } + v.lastCreateBatch = batch + v.lastCreateBatchMsgCount = batchMsgCount + return true, nil +} -func (v *BlockValidator) sendValidations(ctx context.Context) { - v.reorgMutex.Lock() - defer v.reorgMutex.Unlock() - var batchCount uint64 - wasmRoots := v.GetModuleRootsToValidate() - room := 100 // even if there is more room then that it's fine - for _, spawner := range v.validationSpawners { - here := spawner.Room() / len(wasmRoots) - if here <= 0 { - return - } - if here < room { - room = here - } +func (v *BlockValidator) createNextValidationEntry(ctx context.Context) (bool, error) { + v.reorgMutex.RLock() + defer v.reorgMutex.RUnlock() + pos := v.created() + if pos > v.validated()+arbutil.MessageIndex(v.config().ForwardBlocks) { + return false, nil } - for atomic.LoadInt32(&v.reorgsPending) == 0 { - if batchCount <= v.globalPosNextSend.BatchNumber { - var err error - batchCount, err = v.inboxTracker.GetBatchCount() - if err != nil { - log.Error("validator failed to get message count", "err", err) - return - } - if batchCount <= v.globalPosNextSend.BatchNumber { - return - } - } - seqBatchEntry, haveBatch := v.sequencerBatches.Load(v.globalPosNextSend.BatchNumber) - if !haveBatch && batchCount == v.globalPosNextSend.BatchNumber+1 { - // This is the latest batch. - // Wait a bit to see if the inbox tracker populates this sequencer batch, - // but if it's still missing after this wait, we'll query it from the inbox reader. - time.Sleep(time.Second) - seqBatchEntry, haveBatch = v.sequencerBatches.Load(v.globalPosNextSend.BatchNumber) - } - if !haveBatch { - seqMsg, err := v.inboxReader.GetSequencerMessageBytes(ctx, v.globalPosNextSend.BatchNumber) - if err != nil { - log.Error("validator failed to read sequencer message", "err", err) - return - } - v.ProcessBatches(v.globalPosNextSend.BatchNumber, [][]byte{seqMsg}) - seqBatchEntry = seqMsg - } - v.blockMutex.Lock() - if v.lastBlockValidatedUnknown { - firstMsgInBatch := arbutil.MessageIndex(0) - if v.globalPosNextSend.BatchNumber > 0 { - var err error - firstMsgInBatch, err = v.inboxTracker.GetBatchMessageCount(v.globalPosNextSend.BatchNumber - 1) - if err != nil { - v.blockMutex.Unlock() - log.Error("validator couldnt read message count", "err", err) - return - } - } - v.lastBlockValidated = uint64(arbutil.MessageCountToBlockNumber(firstMsgInBatch+arbutil.MessageIndex(v.globalPosNextSend.PosInBatch), v.genesisBlockNum)) - v.nextBlockToValidate = v.lastBlockValidated + 1 - v.lastBlockValidatedUnknown = false - log.Info("Inbox caught up to staker", "blockNr", v.lastBlockValidated, "blockHash", v.lastBlockValidatedHash) - } - v.blockMutex.Unlock() - nextMsg := arbutil.BlockNumberToMessageCount(v.nextBlockToValidate, v.genesisBlockNum) - 1 - // valdationEntries is By blockNumber - entry, found := v.validations.Load(v.nextBlockToValidate) + streamerMsgCount, err := v.streamer.GetProcessedMessageCount() + if err != nil { + return false, err + } + if pos >= streamerMsgCount { + return false, nil + } + msg, err := v.streamer.GetMessage(pos) + if err != nil { + return false, err + } + res, err := v.streamer.ResultAtCount(pos + 1) + if err != nil { + return false, err + } + if v.lastCreateGS.PosInBatch == 0 { + // new batch + found, err := v.readLastCreatedBatch(ctx, v.lastCreateGS.Batch) if !found { - return - } - validationStatus, ok := entry.(*validationStatus) - if !ok || (validationStatus == nil) { - log.Error("bad entry trying to validate batch") - return - } - if validationStatus.getStatus() < Prepared { - return - } - startPos, endPos, err := GlobalStatePositionsFor(v.inboxTracker, nextMsg, v.globalPosNextSend.BatchNumber) - if err != nil { - log.Error("failed calculating position for validation", "err", err, "msg", nextMsg, "batch", v.globalPosNextSend.BatchNumber) - return - } - if startPos != v.globalPosNextSend { - log.Error("inconsistent pos mapping", "msg", nextMsg, "expected", v.globalPosNextSend, "found", startPos) - return + return false, err } - seqMsg, ok := seqBatchEntry.([]byte) - if !ok { - batchNum := validationStatus.Entry.StartPosition.BatchNumber - log.Error("sequencer message bad format", "blockNr", v.nextBlockToValidate, "msgNum", batchNum) - return - } - v.LaunchThread(func(ctx context.Context) { - validationCtx, cancel := context.WithCancel(ctx) - defer cancel() - validationStatus.Cancel = cancel - err := v.ValidationEntryAddSeqMessage(ctx, validationStatus.Entry, startPos, endPos, seqMsg) - if err != nil && validationCtx.Err() == nil { - log.Error("error preparing validation", "err", err) - return - } - input, err := validationStatus.Entry.ToInput() - if err != nil && validationCtx.Err() == nil { - log.Error("error preparing validation", "err", err) - return - } - for _, moduleRoot := range wasmRoots { - for _, spawner := range v.validationSpawners { - run := spawner.Launch(input, moduleRoot) - validationStatus.Runs = append(validationStatus.Runs, run) - } - } - replaced := validationStatus.replaceStatus(Prepared, ValidationSent) - if !replaced { - v.possiblyFatal(errors.New("failed to set status")) - } - }) - room-- - v.nextBlockToValidate++ - v.globalPosNextSend = endPos } + endGS := validator.GoGlobalState{ + BlockHash: res.BlockHash, + SendRoot: res.SendRoot, + Batch: v.lastCreateGS.Batch, + PosInBatch: v.lastCreateGS.PosInBatch + 1, + } + if pos == v.lastCreateBatchMsgCount { + endGS.Batch++ + endGS.PosInBatch = 0 + } else if pos > v.lastCreateBatchMsgCount { + return false, fmt.Errorf("illegal batch msg count %d pos %d batch %d", v.lastCreateBatchMsgCount, pos, endGS.Batch) + } + entry, err := newValidationEntry(pos, v.lastCreateGS, endGS, msg, v.lastCreateBatch, v.lastCreateDelayed) + if err != nil { + return false, err + } + status := &validationStatus{ + Status: uint32(Created), + Entry: entry, + } + v.validations.Store(pos, status) + v.lastCreateGS = endGS + atomicStorePos(&v.createdA, pos+1) + v.lastCreateDelayed = msg.DelayedMessagesRead + return true, nil } -func (v *BlockValidator) sendRecords(ctx context.Context) { - v.reorgMutex.Lock() - defer v.reorgMutex.Unlock() - nextRecord := v.nextBlockToValidate - for atomic.LoadInt32(&v.reorgsPending) == 0 { - if nextRecord >= v.nextBlockToValidate+v.config().PrerecordedBlocks { - return - } - entry, found := v.validations.Load(nextRecord) - if !found { - header := v.blockchain.GetHeaderByNumber(nextRecord) - if header == nil { - // This block hasn't been created yet. - return - } - prevHeader := v.blockchain.GetHeaderByHash(header.ParentHash) - if prevHeader == nil && header.ParentHash != (common.Hash{}) { - log.Warn("failed to get prevHeader in block validator", "num", nextRecord-1, "hash", header.ParentHash) - return - } - msgNum := arbutil.BlockNumberToMessageCount(nextRecord, v.genesisBlockNum) - 1 - msg, err := v.streamer.GetMessage(msgNum) - if err != nil { - log.Warn("failed to get message in block validator", "err", err) - return - } - status, err := v.newValidationStatus(prevHeader, header, msg) +func (v *BlockValidator) iterativeValidationEntryCreator(ctx context.Context, ignored struct{}) time.Duration { + moreWork, err := v.createNextValidationEntry(ctx) + if err != nil { + processed, processedErr := v.streamer.GetProcessedMessageCount() + log.Error("error trying to create validation node", "err", err, "created", v.created()+1, "processed", processed, "processedErr", processedErr) + } + if moreWork { + return 0 + } + return v.config().ValidationPoll +} + +func (v *BlockValidator) sendNextRecordPrepare() error { + if v.nextRecordPrepared != nil { + if v.nextRecordPrepared.Ready() { + prepared, err := v.nextRecordPrepared.Current() if err != nil { - log.Warn("failed to create validation status", "err", err) - return + return err } - v.blockMutex.Lock() - entry, found = v.validations.Load(nextRecord) - if !found { - v.validations.Store(nextRecord, status) - entry = status + if prepared > v.recordPrepardPos { + v.recordPrepardPos = prepared } - v.blockMutex.Unlock() + v.nextRecordPrepared = nil + } else { + return nil } - validationStatus, ok := entry.(*validationStatus) - if !ok || (validationStatus == nil) { - log.Error("bad entry trying to send recordings") - return - } - currentStatus := validationStatus.getStatus() - if currentStatus == RecordFailed { - // retry - v.validations.Delete(nextRecord) - v.triggerSendValidations() - return - } - if currentStatus == Unprepared { - prevHeader := validationStatus.Entry.PrevBlockHeader - if prevHeader != nil { - _, err := v.recordingDatabase.GetOrRecreateState(ctx, prevHeader, stateLogFunc) - if err != nil { - log.Error("error trying to prepare state for recording", "err", err) - } - // add another reference that will be released by the record thread - _, err = v.recordingDatabase.StateFor(prevHeader) - if err != nil { - log.Error("error trying re-reference state for recording", "err", err) - } - if v.lastHeaderForPrepareState != nil { - v.recordingDatabase.Dereference(v.lastHeaderForPrepareState) - } - v.lastHeaderForPrepareState = prevHeader - } - err := v.sendRecord(validationStatus, true) - if err != nil { - log.Error("error trying to send preimage recording", "err", err) - } - } - nextRecord++ } + prepareCount := v.validated() + arbutil.MessageIndex(v.config().PrerecordedBlocks) + created := v.created() + if prepareCount > created { + prepareCount = created + } + if v.recordPrepardPos+2 > prepareCount { + return nil + } + nextPromise := containers.NewPromise[arbutil.MessageIndex]() + v.LaunchThread(func(ctx context.Context) { + err := v.recorder.PrepareForRecord(ctx, v.recordPrepardPos+1, prepareCount-1) + if err != nil { + nextPromise.ProduceError(err) + } else { + nextPromise.Produce(prepareCount - 1) + nonBlockingTriger(v.sendRecordChan) + } + }) + v.nextRecordPrepared = &nextPromise + return nil } -func (v *BlockValidator) writeLastValidatedToDb(blockNumber uint64, blockHash common.Hash, endPos GlobalStatePosition) error { - info := lastBlockValidatedDbInfo{ - BlockNumber: blockNumber, - BlockHash: blockHash, - AfterPosition: endPos, +func (v *BlockValidator) sendNextRecordRequest(ctx context.Context) (bool, error) { + v.reorgMutex.RLock() + defer v.reorgMutex.RUnlock() + err := v.sendNextRecordPrepare() + if err != nil { + return false, err + } + pos := v.recordSent() + if pos > v.recordPrepardPos { + return false, nil + } + validationStatus, found := v.validations.Load(pos) + if !found { + return false, fmt.Errorf("not found entry for pos %d", pos) + } + currentStatus := validationStatus.getStatus() + if currentStatus != Created { + return false, fmt.Errorf("bad status trying to send recordings for pos %d status: %v", pos, currentStatus) } - encodedInfo, err := rlp.EncodeToBytes(info) + err = v.sendRecord(validationStatus) if err != nil { - return err + return false, err } - err = v.db.Put(lastBlockValidatedInfoKey, encodedInfo) + atomicStorePos(&v.recordSentA, pos+1) + return true, nil +} + +func (v *BlockValidator) iterativeValidationEntryRecorder(ctx context.Context, ignored struct{}) time.Duration { + moreWork, err := v.sendNextRecordRequest(ctx) if err != nil { - return err + log.Error("error trying to record for validation node", "err", err) } - return nil + if moreWork { + return 0 + } + return v.config().ValidationPoll } -func (v *BlockValidator) progressValidated() { - v.reorgMutex.Lock() - defer v.reorgMutex.Unlock() - for atomic.LoadInt32(&v.reorgsPending) == 0 { - // Reads from blocksValidated can be non-atomic as all writes hold reorgMutex - checkingBlock := v.lastBlockValidated + 1 - entry, found := v.validations.Load(checkingBlock) - if !found { - return - } - validationStatus, ok := entry.(*validationStatus) - if !ok || (validationStatus == nil) { - log.Error("bad entry trying to advance validated counter") - return +// return val: +// *MessageIndex - pointer to bad entry if there is one (requires reorg) +func (v *BlockValidator) advanceValidations(ctx context.Context) (*arbutil.MessageIndex, error) { + v.reorgMutex.RLock() + defer v.reorgMutex.RUnlock() + + wasmRoots := v.GetModuleRootsToValidate() + room := 100 // even if there is more room then that it's fine + for _, spawner := range v.validationSpawners { + here := spawner.Room() / len(wasmRoots) + if here <= 0 { + room = 0 } - if validationStatus.getStatus() < ValidationSent { - return + if here < room { + room = here } - validationEntry := validationStatus.Entry - if validationEntry.BlockNumber != checkingBlock { - log.Error("bad block number for validation entry", "expected", checkingBlock, "found", validationEntry.BlockNumber) - return + } + pos := v.validated() - 1 // to reverse the first +1 in the loop +validatiosLoop: + for { + if ctx.Err() != nil { + return nil, ctx.Err() } - // It's safe to read lastBlockValidatedHash without the lastBlockValidatedMutex as we have the reorgMutex - if v.lastBlockValidatedHash != validationEntry.PrevBlockHash { - log.Error("lastBlockValidatedHash is %v but validationEntry has prevBlockHash %v for block number %v", v.lastBlockValidatedHash, validationEntry.PrevBlockHash, v.lastBlockValidated) - return + v.valLoopPos = pos + 1 + v.reorgMutex.RUnlock() + v.reorgMutex.RLock() + pos = v.valLoopPos + if pos > v.recordSent() { + return nil, nil } - expectedEnd, err := validationEntry.expectedEnd() - if err != nil { - v.possiblyFatal(err) - return + validationStatus, found := v.validations.Load(pos) + if !found { + return nil, fmt.Errorf("not found entry for pos %d", pos) } - for _, run := range validationStatus.Runs { - if !run.Ready() { - return + currentStatus := validationStatus.getStatus() + if currentStatus == RecordFailed { + // retry + log.Warn("Recording for validation failed, retrying..", "pos", pos) + return &pos, nil + } + if currentStatus == ValidationSent && pos == v.validated() { + if validationStatus.Entry.Start != v.lastValidGS { + log.Warn("Validation entry has wrong start state", "pos", pos, "start", validationStatus.Entry.Start, "expected", v.lastValidGS) + validationStatus.Cancel() + return &pos, nil } - runEnd, err := run.Current() - if err == nil && runEnd != expectedEnd { - err = fmt.Errorf("validation failed: expected %v got %v", expectedEnd, runEnd) - writeErr := v.writeToFile(validationEntry, run.WasmModuleRoot()) - if writeErr != nil { - log.Error("failed to write file", "err", err) + var wasmRoots []common.Hash + for _, run := range validationStatus.Runs { + if !run.Ready() { + continue validatiosLoop + } + wasmRoots = append(wasmRoots, run.WasmModuleRoot()) + runEnd, err := run.Current() + if err == nil && runEnd != validationStatus.Entry.End { + err = fmt.Errorf("validation failed: expected %v got %v", validationStatus.Entry.End, runEnd) + writeErr := v.writeToFile(validationStatus.Entry, run.WasmModuleRoot()) + if writeErr != nil { + log.Warn("failed to write debug results file", "err", err) + } } - v.possiblyFatal(err) + if err != nil { + v.possiblyFatal(err) + return &pos, nil // if not fatal - retry + } + } + for _, run := range validationStatus.Runs { + run.Close() } + v.lastValidGS = validationStatus.Entry.End + go v.recorder.MarkValid(pos, v.lastValidGS.BlockHash) + atomicStorePos(&v.validatedA, pos+1) + nonBlockingTriger(v.createNodesChan) + nonBlockingTriger(v.sendRecordChan) + if v.testingProgressMadeChan != nil { + nonBlockingTriger(v.testingProgressMadeChan) + } + err := v.writeLastValidatedToDb(validationStatus.Entry.End, wasmRoots) if err != nil { - v.possiblyFatal(err) - validationStatus.setStatus(Failed) - return + log.Error("failed writing new validated to database", "pos", pos, "err", err) } + continue } - for _, run := range validationStatus.Runs { - run.Close() + if room == 0 { + return nil, nil } - validationStatus.replaceStatus(ValidationSent, Valid) - v.triggerSendValidations() - earliestBatchKept := atomic.LoadUint64(&v.earliestBatchKept) - seqMsgNr := validationEntry.StartPosition.BatchNumber - if earliestBatchKept < seqMsgNr { - for batch := earliestBatchKept; batch < seqMsgNr; batch++ { - v.sequencerBatches.Delete(batch) + if currentStatus == Prepared { + replaced := validationStatus.replaceStatus(Prepared, SendingValidation) + if !replaced { + v.possiblyFatal(errors.New("failed to set status")) } - atomic.StoreUint64(&v.earliestBatchKept, seqMsgNr) + v.LaunchThread(func(ctx context.Context) { + validationCtx, cancel := context.WithCancel(ctx) + defer cancel() + validationStatus.Cancel = cancel + input, err := validationStatus.Entry.ToInput() + if err != nil && validationCtx.Err() == nil { + v.possiblyFatal(fmt.Errorf("%w: error preparing validation", err)) + return + } + var runs []validator.ValidationRun + for _, moduleRoot := range wasmRoots { + for _, spawner := range v.validationSpawners { + run := spawner.Launch(input, moduleRoot) + runs = append(runs, run) + } + } + validationStatus.Runs = runs + replaced := validationStatus.replaceStatus(SendingValidation, ValidationSent) + if !replaced { + v.possiblyFatal(errors.New("failed to set status to ValidationSent")) + } + // validationStatus might be removed from under us + // trigger validation progress when done + for _, run := range runs { + _, err := run.Await(ctx) + if err != nil { + return + } + } + nonBlockingTriger(v.progressValidationsChan) + }) + room-- } + } +} - v.lastBlockValidatedMutex.Lock() - atomic.StoreUint64(&v.lastBlockValidated, checkingBlock) - v.lastBlockValidatedHash = validationEntry.BlockHash - v.lastBlockValidatedMutex.Unlock() - v.recentlyValid(validationEntry.BlockHeader) - - v.validations.Delete(checkingBlock) - select { - case v.progressChan <- checkingBlock: - default: - } - err = v.writeLastValidatedToDb(validationEntry.BlockNumber, validationEntry.BlockHash, validationEntry.EndPosition) +func (v *BlockValidator) iterativeValidationProgress(ctx context.Context, ignored struct{}) time.Duration { + reorg, err := v.advanceValidations(ctx) + if err != nil { + log.Error("error trying to record for validation node", "err", err) + } else if reorg != nil { + err := v.Reorg(ctx, *reorg) if err != nil { - log.Error("failed to write validated entry to database", "err", err) + log.Error("error trying to rorg validation", "pos", *reorg-1, "err", err) } } + return v.config().ValidationPoll +} + +var ErrValidationCanceled = errors.New("validation of block cancelled") + +func (v *BlockValidator) writeLastValidatedToDb(gs validator.GoGlobalState, wasmRoots []common.Hash) error { + info := GlobalStateValidatedInfo{ + GlobalState: gs, + WasmRoots: wasmRoots, + } + encoded, err := rlp.EncodeToBytes(info) + if err != nil { + return err + } + err = v.db.Put(lastGlobalStateValidatedInfoKey, encoded) + if err != nil { + return err + } + return nil } func (v *BlockValidator) AssumeValid(globalState validator.GoGlobalState) error { if v.Started() { return errors.Errorf("cannot handle AssumeValid while running") } - v.lastBlockValidatedMutex.Lock() - defer v.lastBlockValidatedMutex.Unlock() // don't do anything if we already validated past that - if v.globalPosNextSend.BatchNumber > globalState.Batch { + if v.lastValidGS.Batch > globalState.Batch { return nil } - if v.globalPosNextSend.BatchNumber == globalState.Batch && v.globalPosNextSend.PosInBatch > globalState.PosInBatch { + if v.lastValidGS.Batch == globalState.Batch && v.lastValidGS.PosInBatch > globalState.PosInBatch { return nil } - block := v.blockchain.GetBlockByHash(globalState.BlockHash) - if block == nil { - v.lastBlockValidatedUnknown = true - } else { - v.lastBlockValidated = block.NumberU64() - v.nextBlockToValidate = v.lastBlockValidated + 1 - } - v.lastBlockValidatedHash = globalState.BlockHash - v.globalPosNextSend = GlobalStatePosition{ - BatchNumber: globalState.Batch, - PosInBatch: globalState.PosInBatch, - } + v.lastValidGS = globalState return nil } -func (v *BlockValidator) LastBlockValidated() uint64 { - return atomic.LoadUint64(&v.lastBlockValidated) -} - -func (v *BlockValidator) LastBlockValidatedAndHash() (blockNumber uint64, blockHash common.Hash, wasmModuleRoots []common.Hash) { - v.lastBlockValidatedMutex.Lock() - blockValidated := v.lastBlockValidated - blockValidatedHash := v.lastBlockValidatedHash - v.lastBlockValidatedMutex.Unlock() - - // things can be removed from, but not added to, moduleRootsToValidate. By taking root hashes fter the block we know result is valid - moduleRootsValidated := v.GetModuleRootsToValidate() - - return blockValidated, blockValidatedHash, moduleRootsValidated -} - // Because batches and blocks are handled at separate layers in the node, // and because block generation from messages is asynchronous, // this call is different than ReorgToBlock, which is currently called later. func (v *BlockValidator) ReorgToBatchCount(count uint64) { - v.batchMutex.Lock() - defer v.batchMutex.Unlock() - v.reorgToBatchCountImpl(count) -} - -func (v *BlockValidator) reorgToBatchCountImpl(count uint64) { - localBatchCount := v.nextBatchKept - if localBatchCount < count { - return - } - for i := count; i < localBatchCount; i++ { - v.sequencerBatches.Delete(i) - } - v.nextBatchKept = count -} - -func (v *BlockValidator) ProcessBatches(pos uint64, batches [][]byte) { - v.batchMutex.Lock() - defer v.batchMutex.Unlock() - - v.reorgToBatchCountImpl(pos) - - // Attempt to fill in earliestBatchKept if it's empty - atomic.CompareAndSwapUint64(&v.earliestBatchKept, 0, pos) - - for i, msg := range batches { - v.sequencerBatches.Store(pos+uint64(i), msg) - } - v.nextBatchKept = pos + uint64(len(batches)) - v.triggerSendValidations() + // a normal reorg will be called if necessary - do nothing } - -func (v *BlockValidator) ReorgToBlock(blockNum uint64, blockHash common.Hash) error { - v.blockMutex.Lock() - defer v.blockMutex.Unlock() - - atomic.AddInt32(&v.reorgsPending, 1) +func (v *BlockValidator) Reorg(ctx context.Context, count arbutil.MessageIndex) error { v.reorgMutex.Lock() defer v.reorgMutex.Unlock() - atomic.AddInt32(&v.reorgsPending, -1) - - if blockNum < v.lastValidationEntryBlock { - log.Warn("block validator processing reorg", "blockNum", blockNum) - err := v.reorgToBlockImpl(blockNum, blockHash, false) - if err != nil { - return fmt.Errorf("block validator reorg failed: %w", err) - } + if count == 0 { + return errors.New("cannot reorg out genesis") } - - return nil -} - -func (v *BlockValidator) reorgToBlockImpl(blockNum uint64, blockHash common.Hash, hasLastValidatedMutex bool) error { - for b := blockNum + 1; b <= v.lastValidationEntryBlock; b++ { - entry, found := v.validations.Load(b) - if !found { - continue - } - v.validations.Delete(b) - - validationStatus, ok := entry.(*validationStatus) - if !ok || (validationStatus == nil) { - log.Error("bad entry trying to reorg block validator") - continue - } - log.Debug("canceling validation due to reorg", "block", b) - if validationStatus.Cancel != nil { - validationStatus.Cancel() - } + if !v.chainCaughtUp { + return nil } - v.lastValidationEntryBlock = blockNum - if v.nextBlockToValidate <= blockNum+1 { + if v.created() < count { return nil } - msgIndex := arbutil.BlockNumberToMessageCount(blockNum, v.genesisBlockNum) - 1 - batchCount, err := v.inboxTracker.GetBatchCount() + _, endPosition, err := v.GlobalStatePositionsAtCount(count) if err != nil { + v.possiblyFatal(err) return err } - batch, err := FindBatchContainingMessageIndex(v.inboxTracker, msgIndex, batchCount) + res, err := v.streamer.ResultAtCount(count) if err != nil { + v.possiblyFatal(err) return err } - if batch >= batchCount { - // This reorg is past the latest batch. - // Attempt to recover by loading a next validation state at the start of the next batch. - v.globalPosNextSend = GlobalStatePosition{ - BatchNumber: batch, - PosInBatch: 0, - } - msgCount, err := v.inboxTracker.GetBatchMessageCount(batch - 1) + if endPosition.PosInBatch != 0 { + found, err := v.readLastCreatedBatch(ctx, endPosition.BatchNumber) if err != nil { return err } - nextBlockSigned := arbutil.MessageCountToBlockNumber(msgCount, v.genesisBlockNum) + 1 - if nextBlockSigned <= 0 { - return errors.New("reorg past genesis block") - } - blockNum = uint64(nextBlockSigned) - 1 - block := v.blockchain.GetBlockByNumber(blockNum) - if block == nil { - return fmt.Errorf("failed to get end of batch block %v", blockNum) + if !found { + return fmt.Errorf("couldn't find batch during reorg num %d", endPosition.BatchNumber) } - blockHash = block.Hash() - v.lastValidationEntryBlock = blockNum - } else { - _, v.globalPosNextSend, err = GlobalStatePositionsFor(v.inboxTracker, msgIndex, batch) - if err != nil { - return err + } + for iPos := count; iPos < v.created(); iPos++ { + status, found := v.validations.Load(iPos) + if found && status != nil && status.Cancel != nil { + status.Cancel() } + v.validations.Delete(iPos) } - if v.nextBlockToValidate > blockNum+1 { - v.nextBlockToValidate = blockNum + 1 + v.lastCreateGS = buildGlobalState(*res, endPosition) + countUint64 := uint64(count) + v.createdA = countUint64 + // under the reorg mutex we don't need atomic access + if v.recordSentA > countUint64 { + v.recordSentA = countUint64 } - - if v.lastBlockValidated > blockNum { - if !hasLastValidatedMutex { - v.lastBlockValidatedMutex.Lock() - } - atomic.StoreUint64(&v.lastBlockValidated, blockNum) - v.lastBlockValidatedHash = blockHash - if !hasLastValidatedMutex { - v.lastBlockValidatedMutex.Unlock() - } - - err = v.writeLastValidatedToDb(blockNum, blockHash, v.globalPosNextSend) + if v.validatedA > countUint64 { + v.validatedA = countUint64 + v.lastValidGS = v.lastCreateGS + err := v.writeLastValidatedToDb(v.lastValidGS, []common.Hash{}) // we don't know which wasm roots were validated if err != nil { - return err + log.Error("failed writing valid state after reorg", "err", err) } } - + nonBlockingTriger(v.createNodesChan) return nil } @@ -949,77 +818,68 @@ func (v *BlockValidator) Initialize() error { return nil } -func (v *BlockValidator) Start(ctxIn context.Context) error { - v.StopWaiter.Start(ctxIn, v) - err := stopwaiter.CallIterativelyWith[struct{}](&v.StopWaiterSafe, - func(ctx context.Context, unused struct{}) time.Duration { - v.sendRecords(ctx) - v.sendValidations(ctx) - return v.config().ValidationPoll - }, - v.sendValidationsChan) +func (v *BlockValidator) LaunchWorkthreadsWhenCaughtUp(ctx context.Context) { + for { + err := v.checkValidatedGSCaughUp(ctx) + if err != nil { + log.Error("validator got error waiting for chain to catch up", "err", err) + } + if v.chainCaughtUp { + break + } + select { + case <-ctx.Done(): + return + case <-time.After(v.config().ValidationPoll): + } + } + err := stopwaiter.CallIterativelyWith[struct{}](&v.StopWaiterSafe, v.iterativeValidationEntryCreator, v.createNodesChan) if err != nil { - return err + v.possiblyFatal(err) } - v.CallIteratively(func(ctx context.Context) time.Duration { - v.progressValidated() - return v.config().ValidationPoll - }) - lastValid := uint64(0) - v.CallIteratively(func(ctx context.Context) time.Duration { - newValid, validHash, wasmModuleRoots := v.LastBlockValidatedAndHash() - if newValid != lastValid { - validHeader := v.blockchain.GetHeader(validHash, newValid) - if validHeader == nil { - foundHeader := v.blockchain.GetHeaderByNumber(newValid) - foundHash := common.Hash{} - if foundHeader != nil { - foundHash = foundHeader.Hash() - } - log.Warn("last valid block not in blockchain", "blockNum", newValid, "validatedBlockHash", validHash, "found-hash", foundHash) - } else { - validTimestamp := time.Unix(int64(validHeader.Time), 0) - log.Info("Validated blocks", "blockNum", newValid, "hash", validHash, - "timestamp", validTimestamp, "age", time.Since(validTimestamp), "wasm", wasmModuleRoots) - } - lastValid = newValid - } - return time.Second - }) + err = stopwaiter.CallIterativelyWith[struct{}](&v.StopWaiterSafe, v.iterativeValidationEntryRecorder, v.sendRecordChan) + if err != nil { + v.possiblyFatal(err) + } + err = stopwaiter.CallIterativelyWith[struct{}](&v.StopWaiterSafe, v.iterativeValidationProgress, v.progressValidationsChan) + if err != nil { + v.possiblyFatal(err) + } +} + +func (v *BlockValidator) Start(ctxIn context.Context) error { + v.StopWaiter.Start(ctxIn, v) + v.LaunchThread(v.LaunchWorkthreadsWhenCaughtUp) return nil } func (v *BlockValidator) StopAndWait() { v.StopWaiter.StopAndWait() - err := v.recentShutdown() - if err != nil { - log.Error("error storing valid state", "err", err) - } } -// WaitForBlock can only be used from One thread -func (v *BlockValidator) WaitForBlock(ctx context.Context, blockNumber uint64, timeout time.Duration) bool { +// WaitForPos can only be used from One thread +func (v *BlockValidator) WaitForPos(t *testing.T, ctx context.Context, pos arbutil.MessageIndex, timeout time.Duration) bool { + trigerchan := make(chan struct{}) + v.testingProgressMadeChan = trigerchan timer := time.NewTimer(timeout) defer timer.Stop() + lastLoop := false for { - if atomic.LoadUint64(&v.lastBlockValidated) >= blockNumber { + if pos >= v.validated() { return true } + if lastLoop { + return false + } select { case <-timer.C: - if atomic.LoadUint64(&v.lastBlockValidated) >= blockNumber { + lastLoop = true + case <-trigerchan: + if pos+1 >= v.validated() { return true } - return false - case block, ok := <-v.progressChan: - if block >= blockNumber { - return true - } - if !ok { - return false - } case <-ctx.Done(): - return false + lastLoop = true } } } diff --git a/staker/block_validator_schema.go b/staker/block_validator_schema.go index 3f1d1ae6d6..6939dd05b2 100644 --- a/staker/block_validator_schema.go +++ b/staker/block_validator_schema.go @@ -3,14 +3,27 @@ package staker -import "github.com/ethereum/go-ethereum/common" +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/offchainlabs/nitro/validator" +) + +// Todo: we could create an upgrade scheme for moving from lastMessageValidated to lastBlockValidated +// not a must, since even without this index, we'll start validation from last assertion made +// the other option is to remove lastBlockValidated* from code + +// type legacyLastBlockValidatedDbInfo struct { +// BlockNumber uint64 +// BlockHash common.Hash +// AfterPosition GlobalStatePosition +// } -type lastBlockValidatedDbInfo struct { - BlockNumber uint64 - BlockHash common.Hash - AfterPosition GlobalStatePosition +type GlobalStateValidatedInfo struct { + GlobalState validator.GoGlobalState + WasmRoots []common.Hash } var ( - lastBlockValidatedInfoKey = []byte("_lastBlockValidatedInfo") // contains a rlp encoded lastBlockValidatedDbInfo + lastGlobalStateValidatedInfoKey = []byte("_lastGlobalStateValidatedInfo") // contains a rlp encoded lastBlockValidatedDbInfo + // legacyLastBlockValidatedInfoKey = []byte("_lastBlockValidatedInfo") // contains a rlp encoded lastBlockValidatedDbInfo ) diff --git a/staker/challenge_manager.go b/staker/challenge_manager.go index c125566635..ebfaa6e1ea 100644 --- a/staker/challenge_manager.go +++ b/staker/challenge_manager.go @@ -14,10 +14,10 @@ import ( "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/accounts/abi/bind/backends" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rpc" + "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/solgen/go/challengegen" "github.com/offchainlabs/nitro/validator" ) @@ -70,7 +70,7 @@ type ChallengeManager struct { validator *StatelessBlockValidator wasmModuleRoot common.Hash - initialMachineBlockNr int64 + initialMachineMessageCount arbutil.MessageIndex // nil until working on execution challenge executionChallengeBackend *ExecutionChallengeBackend @@ -85,8 +85,6 @@ func NewChallengeManager( fromAddr common.Address, challengeManagerAddr common.Address, challengeIndex uint64, - l2blockChain *core.BlockChain, - inboxTracker InboxTrackerInterface, validator *StatelessBlockValidator, startL1Block uint64, confirmationBlocks int64, @@ -124,12 +122,10 @@ func NewChallengeManager( return nil, fmt.Errorf("error getting challenge %v info: %w", challengeIndex, err) } - genesisBlockNum := l2blockChain.Config().ArbitrumChainParams.GenesisBlockNum backend, err := NewBlockChallengeBackend( parsedLog, - l2blockChain, - inboxTracker, - genesisBlockNum, + validator.streamer, + validator.inboxTracker, ) if err != nil { return nil, fmt.Errorf("error creating block challenge backend for challenge %v: %w", challengeIndex, err) @@ -426,8 +422,8 @@ func (m *ChallengeManager) LoadExecChallengeIfExists(ctx context.Context) error if err != nil { return fmt.Errorf("error parsing ExecutionChallengeBegun event of challenge %v: %w", m.challengeIndex, err) } - blockNum, tooFar := m.blockChallengeBackend.GetBlockNrAtStep(ev.BlockSteps.Uint64()) - return m.createExecutionBackend(ctx, uint64(blockNum), tooFar) + count, tooFar := m.blockChallengeBackend.GetMessageCountAtStep(ev.BlockSteps.Uint64()) + return m.createExecutionBackend(ctx, count, tooFar) } func (m *ChallengeManager) IssueOneStepProof( @@ -453,37 +449,37 @@ func (m *ChallengeManager) IssueOneStepProof( ) } -func (m *ChallengeManager) createExecutionBackend(ctx context.Context, blockNum uint64, tooFar bool) error { +// count is for the initial machine, which also means it's the position of the challenged machine +func (m *ChallengeManager) createExecutionBackend(ctx context.Context, initialCount arbutil.MessageIndex, tooFar bool) error { // Get the next message and block header, and record the full block creation - if m.initialMachineBlockNr == int64(blockNum) && m.executionChallengeBackend != nil { + if m.initialMachineMessageCount == initialCount && m.executionChallengeBackend != nil { return nil } m.executionChallengeBackend = nil - nextHeader := m.blockChallengeBackend.bc.GetHeaderByNumber(uint64(blockNum + 1)) - if nextHeader == nil { - return fmt.Errorf("next block header %v after challenge point unknown", blockNum+1) + if initialCount == 0 { + return errors.New("cannot validate before genesis block") } - entry, err := m.validator.CreateReadyValidationEntry(ctx, nextHeader) + entry, err := m.validator.CreateReadyValidationEntry(ctx, initialCount) if err != nil { - return fmt.Errorf("error creating validation entry for challenge %v block %v for execution challenge: %w", m.challengeIndex, blockNum, err) + return fmt.Errorf("error creating validation entry for challenge %v msg %v for execution challenge: %w", m.challengeIndex, initialCount, err) } input, err := entry.ToInput() if err != nil { - return fmt.Errorf("error getting validation entry input of challenge %v block %v: %w", m.challengeIndex, blockNum, err) + return fmt.Errorf("error getting validation entry input of challenge %v msg %v: %w", m.challengeIndex, initialCount, err) } if tooFar { input.BatchInfo = []validator.BatchInfo{} } execRun, err := m.validator.execSpawner.CreateExecutionRun(m.wasmModuleRoot, input) if err != nil { - return fmt.Errorf("error creating execution backend for block %v: %w", blockNum, err) + return fmt.Errorf("error creating execution backend for msg %v: %w", initialCount, err) } backend, err := NewExecutionChallengeBackend(execRun) if err != nil { return err } m.executionChallengeBackend = backend - m.initialMachineBlockNr = int64(blockNum) + m.initialMachineMessageCount = initialCount return nil } @@ -531,12 +527,12 @@ func (m *ChallengeManager) Act(ctx context.Context) (*types.Transaction, error) nextMovePos, ) } - blockNum, tooFar := m.blockChallengeBackend.GetBlockNrAtStep(uint64(nextMovePos)) + initialCount, tooFar := m.blockChallengeBackend.GetMessageCountAtStep(uint64(nextMovePos)) expectedState, expectedStatus, err := m.blockChallengeBackend.GetInfoAtStep(uint64(nextMovePos + 1)) if err != nil { return nil, fmt.Errorf("error getting info from block challenge backend: %w", err) } - err = m.createExecutionBackend(ctx, uint64(blockNum), tooFar) + err = m.createExecutionBackend(ctx, initialCount, tooFar) if err != nil { return nil, fmt.Errorf("error creating execution backend: %w", err) } @@ -545,14 +541,14 @@ func (m *ChallengeManager) Act(ctx context.Context) (*types.Transaction, error) return nil, fmt.Errorf("error getting execution challenge final state: %w", err) } if expectedStatus != computedStatus { - return nil, fmt.Errorf("after block %v expected status %v but got %v", blockNum, expectedStatus, computedStatus) + return nil, fmt.Errorf("after block %v expected status %v but got %v", initialCount, expectedStatus, computedStatus) } if computedStatus == StatusFinished { if computedState != expectedState { - return nil, fmt.Errorf("after block %v expected global state %v but got %v", blockNum, expectedState, computedState) + return nil, fmt.Errorf("after block %v expected global state %v but got %v", initialCount, expectedState, computedState) } } - log.Info("issuing one step proof", "challenge", m.challengeIndex, "stepCount", stepCount, "blockNum", blockNum) + log.Info("issuing one step proof", "challenge", m.challengeIndex, "stepCount", stepCount, "initial count", initialCount) return m.blockChallengeBackend.IssueExecChallenge( m.challengeCore, state, diff --git a/staker/l1_validator.go b/staker/l1_validator.go index 141731d5c2..ece27dceb1 100644 --- a/staker/l1_validator.go +++ b/staker/l1_validator.go @@ -14,7 +14,6 @@ import ( "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" @@ -41,16 +40,14 @@ const ( ) type L1Validator struct { - rollup *RollupWatcher - rollupAddress common.Address - validatorUtils *rollupgen.ValidatorUtils - client arbutil.L1Interface - builder *ValidatorTxBuilder - wallet ValidatorWalletInterface - callOpts bind.CallOpts - genesisBlockNumber uint64 - - l2Blockchain *core.BlockChain + rollup *RollupWatcher + rollupAddress common.Address + validatorUtils *rollupgen.ValidatorUtils + client arbutil.L1Interface + builder *ValidatorTxBuilder + wallet ValidatorWalletInterface + callOpts bind.CallOpts + das arbstate.DataAvailabilityReader inboxTracker InboxTrackerInterface txStreamer TransactionStreamerInterface @@ -63,7 +60,6 @@ func NewL1Validator( wallet ValidatorWalletInterface, validatorUtilsAddress common.Address, callOpts bind.CallOpts, - l2Blockchain *core.BlockChain, das arbstate.DataAvailabilityReader, inboxTracker InboxTrackerInterface, txStreamer TransactionStreamerInterface, @@ -84,24 +80,18 @@ func NewL1Validator( if err != nil { return nil, err } - genesisBlockNumber, err := txStreamer.GetGenesisBlockNumber() - if err != nil { - return nil, err - } return &L1Validator{ - rollup: rollup, - rollupAddress: wallet.RollupAddress(), - validatorUtils: validatorUtils, - client: client, - builder: builder, - wallet: wallet, - callOpts: callOpts, - genesisBlockNumber: genesisBlockNumber, - l2Blockchain: l2Blockchain, - das: das, - inboxTracker: inboxTracker, - txStreamer: txStreamer, - blockValidator: blockValidator, + rollup: rollup, + rollupAddress: wallet.RollupAddress(), + validatorUtils: validatorUtils, + client: client, + builder: builder, + wallet: wallet, + callOpts: callOpts, + das: das, + inboxTracker: inboxTracker, + txStreamer: txStreamer, + blockValidator: blockValidator, }, nil } @@ -223,35 +213,6 @@ type OurStakerInfo struct { *StakerInfo } -// Returns (block number, global state inbox position is invalid, error). -// If global state is invalid, block number is set to the last of the batch. -func (v *L1Validator) blockNumberFromGlobalState(gs validator.GoGlobalState) (int64, bool, error) { - var batchHeight arbutil.MessageIndex - if gs.Batch > 0 { - var err error - batchHeight, err = v.inboxTracker.GetBatchMessageCount(gs.Batch - 1) - if err != nil { - return 0, false, err - } - } - - // Validate the PosInBatch if it's non-zero - if gs.PosInBatch > 0 { - nextBatchHeight, err := v.inboxTracker.GetBatchMessageCount(gs.Batch) - if err != nil { - return 0, false, err - } - - if gs.PosInBatch >= uint64(nextBatchHeight-batchHeight) { - // This PosInBatch would enter the next batch. Return the last block before the next batch. - // We can be sure that MessageCountToBlockNumber will return a non-negative number as nextBatchHeight must be nonzero. - return arbutil.MessageCountToBlockNumber(nextBatchHeight, v.genesisBlockNumber), true, nil - } - } - - return arbutil.MessageCountToBlockNumber(batchHeight+arbutil.MessageIndex(gs.PosInBatch), v.genesisBlockNumber), false, nil -} - func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurStakerInfo, strategy StakerStrategy, makeAssertionInterval time.Duration) (nodeAction, bool, error) { startState, prevInboxMaxCount, startStateProposed, err := lookupNodeStartState(ctx, v.rollup, stakerInfo.LatestStakedNode, stakerInfo.LatestStakedNodeHash) if err != nil { @@ -271,69 +232,89 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta if err != nil { return nil, false, fmt.Errorf("error getting batch count from inbox tracker: %w", err) } - if localBatchCount < startState.RequiredBatches() { + if localBatchCount < startState.RequiredBatches() || localBatchCount == 0 { log.Info("catching up to chain batches", "localBatches", localBatchCount, "target", startState.RequiredBatches()) return nil, false, nil } - startBlock := v.l2Blockchain.GetBlockByHash(startState.GlobalState.BlockHash) - if startBlock == nil && (startState.GlobalState != validator.GoGlobalState{}) { - expectedBlockHeight, inboxPositionInvalid, err := v.blockNumberFromGlobalState(startState.GlobalState) - if err != nil { - return nil, false, fmt.Errorf("error getting block number from global state: %w", err) + caughtUp, startCount, err := GlobalStateToMsgCount(v.inboxTracker, v.txStreamer, startState.GlobalState) + if err != nil { + return nil, false, err + } + if !caughtUp { + target := GlobalStatePosition{ + BatchNumber: startState.GlobalState.Batch, + PosInBatch: startState.GlobalState.PosInBatch, } - if inboxPositionInvalid { - log.Error("invalid start global state inbox position", startState.GlobalState.BlockHash, "batch", startState.GlobalState.Batch, "pos", startState.GlobalState.PosInBatch) - return nil, false, errors.New("invalid start global state inbox position") + var current GlobalStatePosition + head, err := v.txStreamer.GetProcessedMessageCount() + if err != nil { + _, current, err = v.blockValidator.GlobalStatePositionsAtCount(head) } - latestHeader := v.l2Blockchain.CurrentBlock().Header() - if latestHeader.Number.Int64() < expectedBlockHeight { - log.Info("catching up to chain blocks", "localBlocks", latestHeader.Number, "target", expectedBlockHeight) - return nil, false, nil + if err != nil { + log.Info("catching up to chain messages", "target", target) } else { - log.Error("unknown start block hash", "hash", startState.GlobalState.BlockHash, "batch", startState.GlobalState.Batch, "pos", startState.GlobalState.PosInBatch) - return nil, false, errors.New("unknown start block hash") + log.Info("catching up to chain blocks", "target", target, "current", current) } + return nil, false, err } - var lastBlockValidated uint64 + var validatedCount arbutil.MessageIndex + var validatedGlobalState validator.GoGlobalState if v.blockValidator != nil { - var expectedHash common.Hash - var validRoots []common.Hash - lastBlockValidated, expectedHash, validRoots = v.blockValidator.LastBlockValidatedAndHash() - haveHash := v.l2Blockchain.GetCanonicalHash(lastBlockValidated) - if haveHash != expectedHash { - return nil, false, fmt.Errorf("block validator validated block %v as hash %v but blockchain has hash %v", lastBlockValidated, expectedHash, haveHash) + valInfo, err := v.blockValidator.ReadLastValidatedInfo() + if err != nil { + return nil, false, err + } + validatedGlobalState = valInfo.GlobalState + caughtUp, validatedCount, err = GlobalStateToMsgCount(v.inboxTracker, v.txStreamer, valInfo.GlobalState) + if err != nil { + return nil, false, fmt.Errorf("%w: not found validated block in blockchain", err) + } + if !caughtUp { + log.Info("catching up to laste validated block", "target", valInfo.GlobalState) } if err := v.updateBlockValidatorModuleRoot(ctx); err != nil { return nil, false, fmt.Errorf("error updating block validator module root: %w", err) } wasmRootValid := false - for _, root := range validRoots { + for _, root := range valInfo.WasmRoots { if v.lastWasmModuleRoot == root { wasmRootValid = true break } } if !wasmRootValid { - return nil, false, fmt.Errorf("wasmroot doesn't match rollup : %v, valid: %v", v.lastWasmModuleRoot, validRoots) + return nil, false, fmt.Errorf("wasmroot doesn't match rollup : %v, valid: %v", v.lastWasmModuleRoot, valInfo.WasmRoots) } } else { - lastBlockValidated = v.l2Blockchain.CurrentBlock().Header().Number.Uint64() - - if localBatchCount > 0 { - messageCount, err := v.inboxTracker.GetBatchMessageCount(localBatchCount - 1) + validatedCount, err = v.txStreamer.GetProcessedMessageCount() + if err != nil || validatedCount == 0 { + return nil, false, err + } + var batchNum uint64 + messageCount, err := v.inboxTracker.GetBatchMessageCount(localBatchCount - 1) + if err != nil { + return nil, false, fmt.Errorf("error getting latest batch %v message count: %w", localBatchCount-1, err) + } + if validatedCount >= messageCount { + batchNum = localBatchCount - 1 + validatedCount = messageCount + } else { + batchNum, err = FindBatchContainingMessageIndex(v.inboxTracker, validatedCount-1, localBatchCount) if err != nil { - return nil, false, fmt.Errorf("error getting latest batch %v message count: %w", localBatchCount-1, err) - } - // Must be non-negative as a batch must contain at least one message - lastBatchBlock := uint64(arbutil.MessageCountToBlockNumber(messageCount, v.genesisBlockNumber)) - if lastBlockValidated > lastBatchBlock { - lastBlockValidated = lastBatchBlock + return nil, false, err } - } else { - lastBlockValidated = 0 } + execResult, err := v.txStreamer.ResultAtCount(validatedCount) + if err != nil { + return nil, false, err + } + _, gsPos, err := GlobalStatePositionsAtCount(v.inboxTracker, validatedCount, batchNum) + if err != nil { + return nil, false, fmt.Errorf("%w: failed calculating GSposition for count %d", err, validatedCount) + } + validatedGlobalState = buildGlobalState(*execResult, gsPos) } currentL1BlockNum, err := v.client.BlockNumber(ctx) @@ -367,84 +348,50 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta // We've found everything we could hope to find break } - if correctNode == nil { - afterGs := nd.AfterState().GlobalState - requiredBatches := nd.AfterState().RequiredBatches() - if localBatchCount < requiredBatches { - return nil, false, fmt.Errorf("waiting for validator to catch up to assertion batches: %v/%v", localBatchCount, requiredBatches) - } - if requiredBatches > 0 { - haveAcc, err := v.inboxTracker.GetBatchAcc(requiredBatches - 1) - if err != nil { - return nil, false, fmt.Errorf("error getting batch %v accumulator: %w", requiredBatches-1, err) - } - if haveAcc != nd.AfterInboxBatchAcc { - return nil, false, fmt.Errorf("missed sequencer batches reorg: at seq num %v have acc %v but assertion has acc %v", requiredBatches-1, haveAcc, nd.AfterInboxBatchAcc) - } - } - lastBlockNum, inboxPositionInvalid, err := v.blockNumberFromGlobalState(afterGs) + if correctNode != nil { + log.Error("found younger sibling to correct assertion (implicitly invalid)", "node", nd.NodeNum) + wrongNodesExist = true + continue + } + afterGs := nd.AfterState().GlobalState + requiredBatches := nd.AfterState().RequiredBatches() + if localBatchCount < requiredBatches { + return nil, false, fmt.Errorf("waiting for validator to catch up to assertion batches: %v/%v", localBatchCount, requiredBatches) + } + if requiredBatches > 0 { + haveAcc, err := v.inboxTracker.GetBatchAcc(requiredBatches - 1) if err != nil { - return nil, false, fmt.Errorf("error getting block number from global state: %w", err) - } - if int64(lastBlockValidated) < lastBlockNum { - return nil, false, fmt.Errorf("waiting for validator to catch up to assertion blocks: %v/%v", lastBlockValidated, lastBlockNum) - } - var expectedBlockHash common.Hash - var expectedSendRoot common.Hash - if lastBlockNum >= 0 { - lastBlock := v.l2Blockchain.GetBlockByNumber(uint64(lastBlockNum)) - if lastBlock == nil { - return nil, false, fmt.Errorf("block %v not in database despite being validated", lastBlockNum) - } - lastBlockExtra, err := types.DeserializeHeaderExtraInformation(lastBlock.Header()) - if err != nil { - return nil, false, fmt.Errorf("error getting block %v header extra info: %w", lastBlockNum, err) - } - expectedBlockHash = lastBlock.Hash() - expectedSendRoot = lastBlockExtra.SendRoot - } - - var expectedNumBlocks uint64 - if startBlock == nil { - expectedNumBlocks = uint64(lastBlockNum + 1) - } else { - expectedNumBlocks = uint64(lastBlockNum) - startBlock.NumberU64() + return nil, false, fmt.Errorf("%w: error getting batch %v accumulator: localBatchCount: %d", err, requiredBatches-1, localBatchCount) } - valid := !inboxPositionInvalid && - nd.Assertion.NumBlocks == expectedNumBlocks && - afterGs.BlockHash == expectedBlockHash && - afterGs.SendRoot == expectedSendRoot - if valid { - log.Info( - "found correct assertion", - "node", nd.NodeNum, - "blockNum", lastBlockNum, - "blockHash", afterGs.BlockHash, - ) - correctNode = existingNodeAction{ - number: nd.NodeNum, - hash: nd.NodeHash, - } - continue - } else { - log.Error( - "found incorrect assertion", - "node", nd.NodeNum, - "inboxPositionInvalid", inboxPositionInvalid, - "computedBlockNum", lastBlockNum, - "numBlocks", nd.Assertion.NumBlocks, - "expectedNumBlocks", expectedNumBlocks, - "blockHash", afterGs.BlockHash, - "expectedBlockHash", expectedBlockHash, - "sendRoot", afterGs.SendRoot, - "expectedSendRoot", expectedSendRoot, - ) + if haveAcc != nd.AfterInboxBatchAcc { + return nil, false, fmt.Errorf("missed sequencer batches reorg: at seq num %v have acc %v but assertion has acc %v", requiredBatches-1, haveAcc, nd.AfterInboxBatchAcc) } - } else { - log.Error("found younger sibling to correct assertion (implicitly invalid)", "node", nd.NodeNum) } - // If we've hit this point, the node is "wrong" - wrongNodesExist = true + caughtUp, nodeMsgCount, err := GlobalStateToMsgCount(v.inboxTracker, v.txStreamer, startState.GlobalState) + if errors.Is(err, ErrGlobalStateNotInChain) { + wrongNodesExist = true + log.Error("Found incorrect assertion", "err", err) + continue + } + if err != nil { + return nil, false, fmt.Errorf("error getting block number from global state: %w", err) + } + if !caughtUp { + return nil, false, fmt.Errorf("waiting for validator to catch up to assertion blocks. Current: %d target: %v", validatedCount, startState.GlobalState) + } + if validatedCount < nodeMsgCount { + return nil, false, fmt.Errorf("waiting for validator to catch up to assertion blocks. %d / %d", validatedCount, nodeMsgCount) + } + log.Info( + "found correct assertion", + "node", nd.NodeNum, + "count", validatedCount, + "blockHash", afterGs.BlockHash, + ) + correctNode = existingNodeAction{ + number: nd.NodeNum, + hash: nd.NodeHash, + } } if correctNode != nil || strategy == WatchtowerStrategy { @@ -457,9 +404,9 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta if len(successorNodes) > 0 { lastNodeHashIfExists = &successorNodes[len(successorNodes)-1].NodeHash } - action, err := v.createNewNodeAction(ctx, stakerInfo, lastBlockValidated, localBatchCount, prevInboxMaxCount, startBlock, startState, lastNodeHashIfExists) + action, err := v.createNewNodeAction(ctx, stakerInfo, localBatchCount, prevInboxMaxCount, startCount, startState, validatedCount, validatedGlobalState, lastNodeHashIfExists) if err != nil { - return nil, wrongNodesExist, fmt.Errorf("error generating create new node action (from start block %v to last block validated %v): %w", startBlock, lastBlockValidated, err) + return nil, wrongNodesExist, fmt.Errorf("error generating create new node action (from pos %d to %d): %w", startCount, validatedCount, err) } return action, wrongNodesExist, nil } @@ -470,11 +417,12 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta func (v *L1Validator) createNewNodeAction( ctx context.Context, stakerInfo *OurStakerInfo, - lastBlockValidated uint64, localBatchCount uint64, prevInboxMaxCount *big.Int, - startBlock *types.Block, + startCount arbutil.MessageIndex, startState *validator.ExecutionState, + validatedCount arbutil.MessageIndex, + validatedGS validator.GoGlobalState, lastNodeHashIfExists *common.Hash, ) (nodeAction, error) { if !prevInboxMaxCount.IsUint64() { @@ -490,61 +438,21 @@ func (v *L1Validator) createNewNodeAction( // we haven't validated anything return nil, nil } - if startBlock != nil && lastBlockValidated <= startBlock.NumberU64() { + if validatedCount < startCount { // we haven't validated any new blocks return nil, nil } - var assertionCoversBatch uint64 - var afterGsBatch uint64 - var afterGsPosInBatch uint64 - for i := localBatchCount - 1; i+1 >= minBatchCount && i > 0; i-- { - batchMessageCount, err := v.inboxTracker.GetBatchMessageCount(i) - if err != nil { - return nil, fmt.Errorf("error getting batch %v message count: %w", i, err) - } - prevBatchMessageCount, err := v.inboxTracker.GetBatchMessageCount(i - 1) - if err != nil { - return nil, fmt.Errorf("error getting previous batch %v message count: %w", i-1, err) - } - // Must be non-negative as a batch must contain at least one message - lastBlockNum := uint64(arbutil.MessageCountToBlockNumber(batchMessageCount, v.genesisBlockNumber)) - prevBlockNum := uint64(arbutil.MessageCountToBlockNumber(prevBatchMessageCount, v.genesisBlockNumber)) - if lastBlockValidated > lastBlockNum { - return nil, fmt.Errorf("%v blocks have been validated but only %v appear in the latest batch", lastBlockValidated, lastBlockNum) - } - if lastBlockValidated > prevBlockNum { - // We found the batch containing the last validated block - if i+1 == minBatchCount && lastBlockValidated < lastBlockNum { - // We haven't reached the minimum assertion size yet - break - } - assertionCoversBatch = i - if lastBlockValidated < lastBlockNum { - afterGsBatch = i - afterGsPosInBatch = lastBlockValidated - prevBlockNum - } else { - afterGsBatch = i + 1 - afterGsPosInBatch = 0 - } - break - } - } - if assertionCoversBatch == 0 { - // we haven't validated the next batch completely + if validatedGS.Batch < minBatchCount { + // didn't validate enough batches return nil, nil } - validatedBatchAcc, err := v.inboxTracker.GetBatchAcc(assertionCoversBatch) - if err != nil { - return nil, fmt.Errorf("error getting batch %v accumulator: %w", assertionCoversBatch, err) - } - - assertingBlock := v.l2Blockchain.GetBlockByNumber(lastBlockValidated) - if assertingBlock == nil { - return nil, fmt.Errorf("missing validated block %v", lastBlockValidated) + batchValidated := validatedGS.Batch + if validatedGS.PosInBatch == 0 { + batchValidated-- } - assertingBlockExtra, err := types.DeserializeHeaderExtraInformation(assertingBlock.Header()) + validatedBatchAcc, err := v.inboxTracker.GetBatchAcc(batchValidated) if err != nil { - return nil, fmt.Errorf("error getting asserting block %v header extra info: %w", assertingBlock.Number(), err) + return nil, fmt.Errorf("error getting batch %v accumulator: %w", batchValidated, err) } hasSiblingByte := [1]byte{0} @@ -554,21 +462,11 @@ func (v *L1Validator) createNewNodeAction( lastHash = *lastNodeHashIfExists hasSiblingByte[0] = 1 } - var assertionNumBlocks uint64 - if startBlock == nil { - assertionNumBlocks = assertingBlock.NumberU64() + 1 - } else { - assertionNumBlocks = assertingBlock.NumberU64() - startBlock.NumberU64() - } + assertionNumBlocks := uint64(validatedCount - startCount) assertion := &Assertion{ BeforeState: startState, AfterState: &validator.ExecutionState{ - GlobalState: validator.GoGlobalState{ - BlockHash: assertingBlock.Hash(), - SendRoot: assertingBlockExtra.SendRoot, - Batch: afterGsBatch, - PosInBatch: afterGsPosInBatch, - }, + GlobalState: validatedGS, MachineStatus: validator.MachineStatusFinished, }, NumBlocks: assertionNumBlocks, diff --git a/staker/staker.go b/staker/staker.go index 6a143ba568..2f7d7b433a 100644 --- a/staker/staker.go +++ b/staker/staker.go @@ -165,7 +165,7 @@ func NewStaker( } client := l1Reader.Client() val, err := NewL1Validator(client, wallet, validatorUtilsAddress, callOpts, - statelessBlockValidator.blockchain, statelessBlockValidator.daService, statelessBlockValidator.inboxTracker, statelessBlockValidator.streamer, blockValidator) + statelessBlockValidator.daService, statelessBlockValidator.inboxTracker, statelessBlockValidator.streamer, blockValidator) if err != nil { return nil, err } @@ -515,8 +515,6 @@ func (s *Staker) handleConflict(ctx context.Context, info *StakerInfo) error { *s.builder.wallet.Address(), s.wallet.ChallengeManagerAddress(), *info.CurrentChallenge, - s.l2Blockchain, - s.inboxTracker, s.statelessBlockValidator, latestConfirmedCreated, s.config.ConfirmationBlocks, diff --git a/staker/stateless_block_validator.go b/staker/stateless_block_validator.go index 43b5b4a303..e45e9100f8 100644 --- a/staker/stateless_block_validator.go +++ b/staker/stateless_block_validator.go @@ -8,20 +8,17 @@ import ( "fmt" "sync" + "github.com/offchainlabs/nitro/arbnode/execution" "github.com/offchainlabs/nitro/util/signature" "github.com/offchainlabs/nitro/validator/server_api" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/validator" - "github.com/ethereum/go-ethereum/arbitrum" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" - "github.com/offchainlabs/nitro/arbos" - "github.com/offchainlabs/nitro/arbos/arbosState" "github.com/offchainlabs/nitro/arbos/arbostypes" "github.com/offchainlabs/nitro/arbstate" "github.com/pkg/errors" @@ -33,14 +30,13 @@ type StatelessBlockValidator struct { execSpawner validator.ExecutionSpawner validationSpawners []validator.ValidationSpawner - inboxReader InboxReaderInterface - inboxTracker InboxTrackerInterface - streamer TransactionStreamerInterface - blockchain *core.BlockChain - db ethdb.Database - daService arbstate.DataAvailabilityReader - genesisBlockNum uint64 - recordingDatabase *arbitrum.RecordingDatabase + recorder *execution.BlockRecorder + + inboxReader InboxReaderInterface + inboxTracker InboxTrackerInterface + streamer TransactionStreamerInterface + db ethdb.Database + daService arbstate.DataAvailabilityReader moduleMutex sync.Mutex currentWasmModuleRoot common.Hash @@ -61,8 +57,9 @@ type InboxTrackerInterface interface { type TransactionStreamerInterface interface { BlockValidatorRegistrer + GetProcessedMessageCount() (arbutil.MessageIndex, error) GetMessage(seqNum arbutil.MessageIndex) (*arbostypes.MessageWithMetadata, error) - GetGenesisBlockNumber() (uint64, error) + ResultAtCount(count arbutil.MessageIndex) (*execution.MessageResult, error) PauseReorgs() ResumeReorgs() } @@ -82,9 +79,9 @@ type GlobalStatePosition struct { PosInBatch uint64 } -func GlobalStatePositionsFor( +func GlobalStatePositionsAtCount( tracker InboxTrackerInterface, - pos arbutil.MessageIndex, + count arbutil.MessageIndex, batch uint64, ) (GlobalStatePosition, GlobalStatePosition, error) { msgCountInBatch, err := tracker.GetBatchMessageCount(batch) @@ -98,17 +95,18 @@ func GlobalStatePositionsFor( return GlobalStatePosition{}, GlobalStatePosition{}, err } } - if msgCountInBatch <= pos { - return GlobalStatePosition{}, GlobalStatePosition{}, fmt.Errorf("batch %d has up to message %d, failed getting for %d", batch, msgCountInBatch-1, pos) + if msgCountInBatch < count { + return GlobalStatePosition{}, GlobalStatePosition{}, fmt.Errorf("batch %d has msgCount %d, failed getting for %d", batch, msgCountInBatch-1, count) } - if firstInBatch > pos { - return GlobalStatePosition{}, GlobalStatePosition{}, fmt.Errorf("batch %d starts from %d, failed getting for %d", batch, firstInBatch, pos) + if firstInBatch >= count { + return GlobalStatePosition{}, GlobalStatePosition{}, fmt.Errorf("batch %d starts from %d, failed getting for %d", batch, firstInBatch, count) } - startPos := GlobalStatePosition{batch, uint64(pos - firstInBatch)} - if msgCountInBatch == pos+1 { + posInBatch := uint64(count - firstInBatch - 1) + startPos := GlobalStatePosition{batch, posInBatch} + if msgCountInBatch == count { return startPos, GlobalStatePosition{batch + 1, 0}, nil } - return startPos, GlobalStatePosition{batch, uint64(pos + 1 - firstInBatch)}, nil + return startPos, GlobalStatePosition{batch, posInBatch + 1}, nil } func FindBatchContainingMessageIndex( @@ -149,138 +147,74 @@ type ValidationEntryStage uint32 const ( Empty ValidationEntryStage = iota ReadyForRecord - Recorded Ready ) type validationEntry struct { Stage ValidationEntryStage // Valid since ReadyforRecord: - BlockNumber uint64 - PrevBlockHash common.Hash - PrevBlockHeader *types.Header - BlockHash common.Hash - BlockHeader *types.Header - HasDelayedMsg bool - DelayedMsgNr uint64 - msg *arbostypes.MessageWithMetadata - // Valid since Recorded: + Pos arbutil.MessageIndex + Start validator.GoGlobalState + End validator.GoGlobalState + HasDelayedMsg bool + DelayedMsgNr uint64 + // valid when created, removed after recording + msg *arbostypes.MessageWithMetadata + // Has batch when created - others could be added on record + BatchInfo []validator.BatchInfo + // Valid since Recorded Preimages map[common.Hash][]byte - BatchInfo []validator.BatchInfo DelayedMsg []byte - // Valid since Ready: - StartPosition GlobalStatePosition - EndPosition GlobalStatePosition -} - -func (v *validationEntry) start() (validator.GoGlobalState, error) { - start := v.StartPosition - prevExtraInfo, err := types.DeserializeHeaderExtraInformation(v.PrevBlockHeader) - if err != nil { - return validator.GoGlobalState{}, err - } - return validator.GoGlobalState{ - Batch: start.BatchNumber, - PosInBatch: start.PosInBatch, - BlockHash: v.PrevBlockHash, - SendRoot: prevExtraInfo.SendRoot, - }, nil -} - -func (v *validationEntry) expectedEnd() (validator.GoGlobalState, error) { - extraInfo, err := types.DeserializeHeaderExtraInformation(v.BlockHeader) - if err != nil { - return validator.GoGlobalState{}, err - } - end := v.EndPosition - return validator.GoGlobalState{ - Batch: end.BatchNumber, - PosInBatch: end.PosInBatch, - BlockHash: v.BlockHash, - SendRoot: extraInfo.SendRoot, - }, nil } func (e *validationEntry) ToInput() (*validator.ValidationInput, error) { if e.Stage != Ready { return nil, errors.New("cannot create input from non-ready entry") } - startState, err := e.start() - if err != nil { - return nil, err - } return &validator.ValidationInput{ - Id: e.BlockNumber, + Id: uint64(e.Pos), HasDelayedMsg: e.HasDelayedMsg, DelayedMsgNr: e.DelayedMsgNr, Preimages: e.Preimages, BatchInfo: e.BatchInfo, DelayedMsg: e.DelayedMsg, - StartState: startState, + StartState: e.Start, }, nil } -func usingDelayedMsg(prevHeader *types.Header, header *types.Header) (bool, uint64) { - if prevHeader == nil { - return true, 0 - } - if header.Nonce == prevHeader.Nonce { - return false, 0 - } - return true, prevHeader.Nonce.Uint64() -} - func newValidationEntry( - prevHeader *types.Header, - header *types.Header, + pos arbutil.MessageIndex, + start validator.GoGlobalState, + end validator.GoGlobalState, msg *arbostypes.MessageWithMetadata, + batch []byte, + prevDelayed uint64, ) (*validationEntry, error) { - hasDelayedMsg, delayedMsgNr := usingDelayedMsg(prevHeader, header) + batchInfo := validator.BatchInfo{ + Number: start.Batch, + Data: batch, + } return &validationEntry{ - Stage: ReadyForRecord, - BlockNumber: header.Number.Uint64(), - PrevBlockHash: prevHeader.Hash(), - PrevBlockHeader: prevHeader, - BlockHash: header.Hash(), - BlockHeader: header, - HasDelayedMsg: hasDelayedMsg, - DelayedMsgNr: delayedMsgNr, - msg: msg, + Stage: ReadyForRecord, + Pos: pos, + Start: start, + End: end, + HasDelayedMsg: (msg.DelayedMessagesRead > prevDelayed), + DelayedMsgNr: msg.DelayedMessagesRead, + msg: msg, + BatchInfo: []validator.BatchInfo{batchInfo}, }, nil } -func newRecordedValidationEntry( - prevHeader *types.Header, - header *types.Header, - preimages map[common.Hash][]byte, - batchInfos []validator.BatchInfo, - delayedMsg []byte, -) (*validationEntry, error) { - entry, err := newValidationEntry(prevHeader, header, nil) - if err != nil { - return nil, err - } - entry.Preimages = preimages - entry.BatchInfo = batchInfos - entry.DelayedMsg = delayedMsg - entry.Stage = Recorded - return entry, nil -} - func NewStatelessBlockValidator( inboxReader InboxReaderInterface, inbox InboxTrackerInterface, streamer TransactionStreamerInterface, - blockchain *core.BlockChain, - blockchainDb ethdb.Database, + recorder *execution.BlockRecorder, arbdb ethdb.Database, das arbstate.DataAvailabilityReader, config *BlockValidatorConfig, ) (*StatelessBlockValidator, error) { - genesisBlockNum, err := streamer.GetGenesisBlockNumber() - if err != nil { - return nil, err - } var jwt []byte if config.JWTSecret != "" { jwtHash, err := signature.LoadSigningKey(config.JWTSecret) @@ -294,15 +228,13 @@ func NewStatelessBlockValidator( validator := &StatelessBlockValidator{ config: config, execSpawner: execClient, + recorder: recorder, validationSpawners: []validator.ValidationSpawner{valClient}, inboxReader: inboxReader, inboxTracker: inbox, streamer: streamer, - blockchain: blockchain, db: arbdb, daService: das, - genesisBlockNum: genesisBlockNum, - recordingDatabase: arbitrum.NewRecordingDatabase(blockchainDb, blockchain), } return validator, nil } @@ -318,154 +250,41 @@ func (v *StatelessBlockValidator) GetModuleRootsToValidate() []common.Hash { return validatingModuleRoots } -func stateLogFunc(targetHeader, header *types.Header, hasState bool) { - if targetHeader == nil || header == nil { - return - } - gap := targetHeader.Number.Int64() - header.Number.Int64() - step := int64(500) - stage := "computing state" - if !hasState { - step = 3000 - stage = "looking for full block" - } - if (gap >= step) && (gap%step == 0) { - log.Info("Setting up validation", "stage", stage, "current", header.Number, "target", targetHeader.Number) - } -} - -// If msg is nil, this will record block creation up to the point where message would be accessed (for a "too far" proof) -// If keepreference == true, reference to state of prevHeader is added (no reference added if an error is returned) -func (v *StatelessBlockValidator) RecordBlockCreation( - ctx context.Context, - prevHeader *types.Header, - msg *arbostypes.MessageWithMetadata, - keepReference bool, -) (common.Hash, map[common.Hash][]byte, []validator.BatchInfo, error) { - - recordingdb, chaincontext, recordingKV, err := v.recordingDatabase.PrepareRecording(ctx, prevHeader, stateLogFunc) - if err != nil { - return common.Hash{}, nil, nil, err - } - defer func() { v.recordingDatabase.Dereference(prevHeader) }() - - chainConfig := v.blockchain.Config() - - // Get the chain ID, both to validate and because the replay binary also gets the chain ID, - // so we need to populate the recordingdb with preimages for retrieving the chain ID. - if prevHeader != nil { - initialArbosState, err := arbosState.OpenSystemArbosState(recordingdb, nil, true) - if err != nil { - return common.Hash{}, nil, nil, fmt.Errorf("error opening initial ArbOS state: %w", err) - } - chainId, err := initialArbosState.ChainId() - if err != nil { - return common.Hash{}, nil, nil, fmt.Errorf("error getting chain ID from initial ArbOS state: %w", err) - } - if chainId.Cmp(chainConfig.ChainID) != 0 { - return common.Hash{}, nil, nil, fmt.Errorf("unexpected chain ID %v in ArbOS state, expected %v", chainId, chainConfig.ChainID) - } - genesisNum, err := initialArbosState.GenesisBlockNum() - if err != nil { - return common.Hash{}, nil, nil, fmt.Errorf("error getting genesis block number from initial ArbOS state: %w", err) - } - expectedNum := chainConfig.ArbitrumChainParams.GenesisBlockNum - if genesisNum != expectedNum { - return common.Hash{}, nil, nil, fmt.Errorf("unexpected genesis block number %v in ArbOS state, expected %v", genesisNum, expectedNum) - } - } - - var blockHash common.Hash - var readBatchInfo []validator.BatchInfo - if msg != nil { - batchFetcher := func(batchNum uint64) ([]byte, error) { - data, err := v.inboxReader.GetSequencerMessageBytes(ctx, batchNum) - if err != nil { - return nil, err - } - readBatchInfo = append(readBatchInfo, validator.BatchInfo{ - Number: batchNum, - Data: data, - }) - return data, nil - } - // Re-fetch the batch instead of using our cached cost, - // as the replay binary won't have the cache populated. - msg.Message.BatchGasCost = nil - block, _, err := arbos.ProduceBlock( - msg.Message, - msg.DelayedMessagesRead, - prevHeader, - recordingdb, - chaincontext, - chainConfig, - batchFetcher, - ) - if err != nil { - return common.Hash{}, nil, nil, err - } - blockHash = block.Hash() - } - - preimages, err := v.recordingDatabase.PreimagesFromRecording(chaincontext, recordingKV) - if err != nil { - return common.Hash{}, nil, nil, err - } - if keepReference { - prevHeader = nil - } - return blockHash, preimages, readBatchInfo, err -} - -func (v *StatelessBlockValidator) ValidationEntryRecord(ctx context.Context, e *validationEntry, keepReference bool) error { +func (v *StatelessBlockValidator) ValidationEntryRecord(ctx context.Context, e *validationEntry) error { if e.Stage != ReadyForRecord { return errors.Errorf("validation entry should be ReadyForRecord, is: %v", e.Stage) } - if e.PrevBlockHeader == nil { - e.Stage = Recorded + // nothing to record for genesis + if e.Pos == 0 { + e.Stage = Ready return nil } - blockhash, preimages, readBatchInfo, err := v.RecordBlockCreation(ctx, e.PrevBlockHeader, e.msg, keepReference) + recording, err := v.recorder.RecordBlockCreation(ctx, e.Pos, e.msg) if err != nil { return err } - if blockhash != e.BlockHash { - return fmt.Errorf("recording failed: blockNum %d, hash expected %v, got %v", e.BlockNumber, e.BlockHash, blockhash) + if recording.BlockHash != e.End.BlockHash { + return fmt.Errorf("recording failed: pos %d, hash expected %v, got %v", e.Pos, e.End.BlockHash, recording.BlockHash) } if e.HasDelayedMsg { delayedMsg, err := v.inboxTracker.GetDelayedMessageBytes(e.DelayedMsgNr) if err != nil { log.Error( "error while trying to read delayed msg for proving", - "err", err, "seq", e.DelayedMsgNr, "blockNr", e.BlockNumber, + "err", err, "seq", e.DelayedMsgNr, "pos", e.Pos, ) return fmt.Errorf("error while trying to read delayed msg for proving: %w", err) } e.DelayedMsg = delayedMsg } - e.Preimages = preimages - e.BatchInfo = readBatchInfo - e.msg = nil // no longer needed - e.Stage = Recorded - return nil -} -func (v *StatelessBlockValidator) ValidationEntryAddSeqMessage(ctx context.Context, e *validationEntry, - startPos, endPos GlobalStatePosition, seqMsg []byte) error { - if e.Stage != Recorded { - return fmt.Errorf("validation entry stage should be Recorded, is: %v", e.Stage) - } - if e.Preimages == nil { + e.BatchInfo = append(e.BatchInfo, recording.BatchInfo...) + + if recording.Preimages != nil { + e.Preimages = recording.Preimages + } else { e.Preimages = make(map[common.Hash][]byte) } - e.StartPosition = startPos - e.EndPosition = endPos - seqMsgBatchInfo := validator.BatchInfo{ - Number: startPos.BatchNumber, - Data: seqMsg, - } - e.BatchInfo = append(e.BatchInfo, seqMsgBatchInfo) - for _, batch := range e.BatchInfo { if len(batch.Data) <= 40 { continue @@ -474,10 +293,7 @@ func (v *StatelessBlockValidator) ValidationEntryAddSeqMessage(ctx context.Conte continue } if v.daService == nil { - log.Error("No DAS configured, but sequencer message found with DAS header") - if v.blockchain.Config().ArbitrumChainParams.DataAvailabilityCommittee { - return errors.New("processing data availability chain without DAS configured") - } + log.Warn("No DAS configured, but sequencer message found with DAS header") } else { _, err := arbstate.RecoverPayloadFromDasBatch( ctx, batch.Number, batch.Data, v.daService, e.Preimages, arbstate.KeysetValidate, @@ -487,67 +303,72 @@ func (v *StatelessBlockValidator) ValidationEntryAddSeqMessage(ctx context.Conte } } } + + e.msg = nil // no longer needed e.Stage = Ready return nil } -func (v *StatelessBlockValidator) CreateReadyValidationEntry(ctx context.Context, header *types.Header) (*validationEntry, error) { - if header == nil { - return nil, errors.New("header not found") - } - blockNum := header.Number.Uint64() - msgIndex := arbutil.BlockNumberToMessageCount(blockNum, v.genesisBlockNum) - 1 - prevHeader := v.blockchain.GetHeaderByNumber(blockNum - 1) - if prevHeader == nil { - return nil, errors.New("prev header not found") +func buildGlobalState(res execution.MessageResult, pos GlobalStatePosition) validator.GoGlobalState { + return validator.GoGlobalState{ + BlockHash: res.BlockHash, + SendRoot: res.SendRoot, + Batch: pos.BatchNumber, + PosInBatch: pos.PosInBatch, } - if header.ParentHash != prevHeader.Hash() { - return nil, fmt.Errorf("hashes don't match block %d hash %v parent %v prev-found %v", - blockNum, header.Hash(), header.ParentHash, prevHeader.Hash()) +} + +func (v *StatelessBlockValidator) GlobalStatePositionsAtCount(count arbutil.MessageIndex) (GlobalStatePosition, GlobalStatePosition, error) { + if count == 0 { + return GlobalStatePosition{}, GlobalStatePosition{1, 0}, nil } - msg, err := v.streamer.GetMessage(msgIndex) + batchCount, err := v.inboxTracker.GetBatchCount() if err != nil { - return nil, err + return GlobalStatePosition{}, GlobalStatePosition{}, err } - resHash, preimages, readBatchInfo, err := v.RecordBlockCreation(ctx, prevHeader, msg, false) + batch, err := FindBatchContainingMessageIndex(v.inboxTracker, count-1, batchCount) if err != nil { - return nil, fmt.Errorf("failed to get block data to validate: %w", err) - } - if resHash != header.Hash() { - return nil, fmt.Errorf("wrong hash expected %s got %s", header.Hash(), resHash) + return GlobalStatePosition{}, GlobalStatePosition{}, err } - batchCount, err := v.inboxTracker.GetBatchCount() + return GlobalStatePositionsAtCount(v.inboxTracker, count, batch) +} + +func (v *StatelessBlockValidator) CreateReadyValidationEntry(ctx context.Context, pos arbutil.MessageIndex) (*validationEntry, error) { + msg, err := v.streamer.GetMessage(pos) if err != nil { return nil, err } - batch, err := FindBatchContainingMessageIndex(v.inboxTracker, msgIndex, batchCount) + result, err := v.streamer.ResultAtCount(pos + 1) if err != nil { return nil, err } - - startPos, endPos, err := GlobalStatePositionsFor(v.inboxTracker, msgIndex, batch) - if err != nil { - return nil, fmt.Errorf("failed calculating position for validation: %w", err) - } - - usingDelayed, delaydNr := usingDelayedMsg(prevHeader, header) - var delayed []byte - if usingDelayed { - delayed, err = v.inboxTracker.GetDelayedMessageBytes(delaydNr) + var prevDelayed uint64 + if pos > 0 { + prev, err := v.streamer.GetMessage(pos - 1) if err != nil { - return nil, fmt.Errorf("error while trying to read delayed msg for proving: %w", err) + return nil, err } + prevDelayed = prev.DelayedMessagesRead } - entry, err := newRecordedValidationEntry(prevHeader, header, preimages, readBatchInfo, delayed) + prevResult, err := v.streamer.ResultAtCount(pos) if err != nil { - return nil, fmt.Errorf("failed to create validation entry %w", err) + return nil, err } - + startPos, endPos, err := v.GlobalStatePositionsAtCount(pos + 1) + if err != nil { + return nil, fmt.Errorf("failed calculating position for validation: %w", err) + } + start := buildGlobalState(*prevResult, startPos) + end := buildGlobalState(*result, endPos) seqMsg, err := v.inboxReader.GetSequencerMessageBytes(ctx, startPos.BatchNumber) if err != nil { return nil, err } - err = v.ValidationEntryAddSeqMessage(ctx, entry, startPos, endPos, seqMsg) + entry, err := newValidationEntry(pos, start, end, msg, seqMsg, prevDelayed) + if err != nil { + return nil, err + } + err = v.ValidationEntryRecord(ctx, entry) if err != nil { return nil, err } @@ -556,13 +377,9 @@ func (v *StatelessBlockValidator) CreateReadyValidationEntry(ctx context.Context } func (v *StatelessBlockValidator) ValidateBlock( - ctx context.Context, header *types.Header, useExec bool, moduleRoot common.Hash, + ctx context.Context, pos arbutil.MessageIndex, useExec bool, moduleRoot common.Hash, ) (bool, error) { - entry, err := v.CreateReadyValidationEntry(ctx, header) - if err != nil { - return false, err - } - expEnd, err := entry.expectedEnd() + entry, err := v.CreateReadyValidationEntry(ctx, pos) if err != nil { return false, err } @@ -591,17 +408,13 @@ func (v *StatelessBlockValidator) ValidateBlock( }() for _, run := range runs { gsEnd, err := run.Await(ctx) - if err != nil || gsEnd != expEnd { + if err != nil || gsEnd != entry.End { return false, err } } return true, nil } -func (v *StatelessBlockValidator) RecordDBReferenceCount() int64 { - return v.recordingDatabase.ReferenceCount() -} - func (v *StatelessBlockValidator) Start(ctx_in context.Context) error { err := v.execSpawner.Start(ctx_in) if err != nil { diff --git a/system_tests/block_validator_test.go b/system_tests/block_validator_test.go index 97e8beef05..5a02c5873d 100644 --- a/system_tests/block_validator_test.go +++ b/system_tests/block_validator_test.go @@ -17,6 +17,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/offchainlabs/nitro/arbnode" "github.com/offchainlabs/nitro/arbos/l2pricing" + "github.com/offchainlabs/nitro/arbutil" ) func testBlockValidatorSimple(t *testing.T, dasModeString string, simpletxloops int, expensiveTx bool, arbitrator bool) { @@ -123,10 +124,12 @@ func testBlockValidatorSimple(t *testing.T, dasModeString string, simpletxloops } t.Log("waiting for block: ", lastBlock.NumberU64()) timeout := getDeadlineTimeout(t, time.Minute*10) - if !nodeB.BlockValidator.WaitForBlock(ctx, lastBlock.NumberU64(), timeout) { + // messageindex is same as block number here + if !nodeB.BlockValidator.WaitForPos(t, ctx, arbutil.MessageIndex(lastBlock.NumberU64()), timeout) { Fail(t, "did not validate all blocks") } - finalRefCount := nodeB.BlockValidator.RecordDBReferenceCount() + nodeB.Execution.Recorder.TrimAllPrepared(t) + finalRefCount := nodeB.Execution.Recorder.RecordingDBReferenceCount() lastBlockNow, err := l2clientB.BlockByNumber(ctx, nil) Require(t, err) // up to 3 extra references: awaiting validation, recently valid, lastValidatedHeader diff --git a/system_tests/full_challenge_impl_test.go b/system_tests/full_challenge_impl_test.go index d19daaf2a7..4aa83f8081 100644 --- a/system_tests/full_challenge_impl_test.go +++ b/system_tests/full_challenge_impl_test.go @@ -337,7 +337,7 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { confirmLatestBlock(ctx, t, l1Info, l1Backend) - asserterValidator, err := staker.NewStatelessBlockValidator(asserterL2.InboxReader, asserterL2.InboxTracker, asserterL2.TxStreamer, asserterL2Blockchain, asserterL2ChainDb, asserterL2ArbDb, nil, &conf.BlockValidator) + asserterValidator, err := staker.NewStatelessBlockValidator(asserterL2.InboxReader, asserterL2.InboxTracker, asserterL2.TxStreamer, asserterL2.Execution.Recorder, asserterL2ArbDb, nil, &conf.BlockValidator) if err != nil { Fail(t, err) } @@ -346,11 +346,11 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { Fail(t, err) } defer asserterValidator.Stop() - asserterManager, err := staker.NewChallengeManager(ctx, l1Backend, &asserterTxOpts, asserterTxOpts.From, challengeManagerAddr, 1, asserterL2Blockchain, asserterL2.InboxTracker, asserterValidator, 0, 0) + asserterManager, err := staker.NewChallengeManager(ctx, l1Backend, &asserterTxOpts, asserterTxOpts.From, challengeManagerAddr, 1, asserterValidator, 0, 0) if err != nil { Fail(t, err) } - challengerValidator, err := staker.NewStatelessBlockValidator(challengerL2.InboxReader, challengerL2.InboxTracker, challengerL2.TxStreamer, challengerL2Blockchain, challengerL2ChainDb, challengerL2ArbDb, nil, &conf.BlockValidator) + challengerValidator, err := staker.NewStatelessBlockValidator(challengerL2.InboxReader, challengerL2.InboxTracker, challengerL2.TxStreamer, challengerL2.Execution.Recorder, challengerL2ArbDb, nil, &conf.BlockValidator) if err != nil { Fail(t, err) } @@ -359,7 +359,7 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { Fail(t, err) } defer challengerValidator.Stop() - challengerManager, err := staker.NewChallengeManager(ctx, l1Backend, &challengerTxOpts, challengerTxOpts.From, challengeManagerAddr, 1, challengerL2Blockchain, challengerL2.InboxTracker, challengerValidator, 0, 0) + challengerManager, err := staker.NewChallengeManager(ctx, l1Backend, &challengerTxOpts, challengerTxOpts.From, challengeManagerAddr, 1, challengerValidator, 0, 0) if err != nil { Fail(t, err) } diff --git a/system_tests/seqinbox_test.go b/system_tests/seqinbox_test.go index 9cef672434..baaa01a50b 100644 --- a/system_tests/seqinbox_test.go +++ b/system_tests/seqinbox_test.go @@ -269,11 +269,13 @@ func testSequencerInboxReaderImpl(t *testing.T, validator bool) { if validator && i%15 == 0 { for i := 0; ; i++ { - lastValidated := arbNode.BlockValidator.LastBlockValidated() - if lastValidated == expectedBlockNumber { + expectedPos, err := arbNode.Execution.ExecEngine.BlockNumberToMessageIndex(expectedBlockNumber) + Require(t, err) + lastValidated := arbNode.BlockValidator.Validated(t) + if lastValidated == expectedPos+1 { break } else if i >= 1000 { - Fail(t, "timed out waiting for block validator; have", lastValidated, "want", expectedBlockNumber) + Fail(t, "timed out waiting for block validator; have", lastValidated, "want", expectedPos+1) } time.Sleep(time.Second) } diff --git a/system_tests/staker_test.go b/system_tests/staker_test.go index 4bd73fa9e9..e7f35e5701 100644 --- a/system_tests/staker_test.go +++ b/system_tests/staker_test.go @@ -135,8 +135,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) l2nodeA.InboxReader, l2nodeA.InboxTracker, l2nodeA.TxStreamer, - l2nodeA.Execution.ArbInterface.BlockChain(), - l2nodeA.Execution.ChainDB, + l2nodeA.Execution.Recorder, l2nodeA.ArbDB, nil, &blockValidatorConfig, @@ -162,8 +161,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) l2nodeB.InboxReader, l2nodeB.InboxTracker, l2nodeB.TxStreamer, - l2nodeB.Execution.ArbInterface.BlockChain(), - l2nodeB.Execution.ChainDB, + l2nodeB.Execution.Recorder, l2nodeB.ArbDB, nil, &staker.DefaultBlockValidatorConfig, diff --git a/system_tests/twonodeslong_test.go b/system_tests/twonodeslong_test.go index 11359f3548..0c11c1fcb5 100644 --- a/system_tests/twonodeslong_test.go +++ b/system_tests/twonodeslong_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/offchainlabs/nitro/arbos/l2pricing" + "github.com/offchainlabs/nitro/arbutil" "github.com/ethereum/go-ethereum/core/types" ) @@ -173,7 +174,7 @@ func testTwoNodesLong(t *testing.T, dasModeStr string) { lastBlockHeader, err := l2clientB.HeaderByNumber(ctx, nil) Require(t, err) timeout := getDeadlineTimeout(t, time.Minute*30) - if !nodeB.BlockValidator.WaitForBlock(ctx, lastBlockHeader.Number.Uint64(), timeout) { + if !nodeB.BlockValidator.WaitForPos(t, ctx, arbutil.MessageIndex(lastBlockHeader.Number.Uint64()), timeout) { Fail(t, "did not validate all blocks") } } From 48afd4b825b52bb7ac28e46c403b4a17564a1a43 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 13 Feb 2023 18:06:46 -0700 Subject: [PATCH 04/63] tracker: add information for AccumulatorNotFound --- arbnode/inbox_tracker.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arbnode/inbox_tracker.go b/arbnode/inbox_tracker.go index 980f4e97e8..05618faa8c 100644 --- a/arbnode/inbox_tracker.go +++ b/arbnode/inbox_tracker.go @@ -102,7 +102,7 @@ func (t *InboxTracker) GetDelayedAcc(seqNum uint64) (common.Hash, error) { return common.Hash{}, err } if !hasKey { - return common.Hash{}, AccumulatorNotFoundErr + return common.Hash{}, fmt.Errorf("%w: not found delayed %d", AccumulatorNotFoundErr, seqNum) } } data, err := t.db.Get(key) @@ -150,7 +150,7 @@ func (t *InboxTracker) GetBatchMetadata(seqNum uint64) (BatchMetadata, error) { return BatchMetadata{}, err } if !hasKey { - return BatchMetadata{}, AccumulatorNotFoundErr + return BatchMetadata{}, fmt.Errorf("%w: no metadata for batch %d", AccumulatorNotFoundErr, seqNum) } data, err := t.db.Get(key) if err != nil { From fb465a101c15e5328647795da3fa737e611a17cf Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 13 Feb 2023 18:55:35 -0700 Subject: [PATCH 05/63] notfy block recorder of reorgs --- arbnode/execution/block_recorder.go | 10 ++++++---- arbnode/execution/executionengine.go | 18 ++++++++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/arbnode/execution/block_recorder.go b/arbnode/execution/block_recorder.go index b1f9997279..735dd89de0 100644 --- a/arbnode/execution/block_recorder.go +++ b/arbnode/execution/block_recorder.go @@ -47,10 +47,12 @@ type RecordResult struct { } func NewBlockRecorder(execEngine *ExecutionEngine, ethDb ethdb.Database) *BlockRecorder { - return &BlockRecorder{ + recorder := &BlockRecorder{ execEngine: execEngine, recordingDatabase: arbitrum.NewRecordingDatabase(ethDb, execEngine.bc), } + execEngine.SetRecorder(recorder) + return recorder } func stateLogFunc(targetHeader, header *types.Header, hasState bool) { @@ -319,18 +321,18 @@ func (r *BlockRecorder) PrepareForRecord(ctx context.Context, start, end arbutil func (r *BlockRecorder) ReorgTo(hdr *types.Header) { r.validHdrLock.Lock() - if r.validHdr.Number.Cmp(hdr.Number) > 0 { + if r.validHdr != nil && r.validHdr.Number.Cmp(hdr.Number) > 0 { log.Error("block recorder: reorging past previously-marked final block", "reorg target num", hdr.Number, "hash", hdr.Hash(), "reorged past num", r.validHdr.Number, "hash", r.validHdr.Hash()) r.recordingDatabase.Dereference(r.validHdr) r.validHdr = nil } - if r.validHdrCandidate.Number.Cmp(hdr.Number) > 0 { + if r.validHdrCandidate != nil && r.validHdrCandidate.Number.Cmp(hdr.Number) > 0 { r.recordingDatabase.Dereference(r.validHdrCandidate) r.validHdrCandidate = nil } r.validHdrLock.Unlock() r.lastHdrLock.Lock() - if r.lastHdr.Number.Cmp(hdr.Number) > 0 { + if r.lastHdr != nil && r.lastHdr.Number.Cmp(hdr.Number) > 0 { r.recordingDatabase.Dereference(r.lastHdr) r.lastHdr = nil } diff --git a/arbnode/execution/executionengine.go b/arbnode/execution/executionengine.go index 842770dbf5..590b449687 100644 --- a/arbnode/execution/executionengine.go +++ b/arbnode/execution/executionengine.go @@ -35,6 +35,7 @@ type ExecutionEngine struct { bc *core.BlockChain streamer TransactionStreamerInterface + recorder *BlockRecorder resequenceChan chan []*arbostypes.MessageWithMetadata createBlocksMutex sync.Mutex @@ -66,12 +67,22 @@ func (s *ExecutionEngine) SetReorgSequencingPolicy(reorgSequencing func() *arbos s.reorgSequencing = reorgSequencing } +func (s *ExecutionEngine) SetRecorder(recorder *BlockRecorder) { + if s.Started() { + panic("trying to set recorder after start") + } + if s.reorgSequencing != nil { + panic("trying to set recorder policy when already set") + } + s.recorder = recorder +} + func (s *ExecutionEngine) SetTransactionStreamer(streamer TransactionStreamerInterface) { if s.Started() { - panic("trying to set reorg sequencing policy after start") + panic("trying to set transaction streamer after start") } if s.streamer != nil { - panic("trying to set reorg sequencing policy when already set") + panic("trying to set transaction streamer when already set") } s.streamer = streamer } @@ -105,6 +116,9 @@ func (s *ExecutionEngine) Reorg(count arbutil.MessageIndex, newMessages []arbost return err } } + if s.recorder != nil { + s.recorder.ReorgTo(targetBlock.Header()) + } s.resequenceChan <- oldMessages successful = true return nil From 03afe10c07be0431f6dfe8dc4c8b8a92bf3ffe87 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 15 Feb 2023 11:24:14 -0700 Subject: [PATCH 06/63] validateResult api fixes --- arbnode/api.go | 13 +++++++++---- staker/stateless_block_validator.go | 14 +++++++------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/arbnode/api.go b/arbnode/api.go index 27ddc45f20..6c067329e6 100644 --- a/arbnode/api.go +++ b/arbnode/api.go @@ -10,6 +10,7 @@ import ( "github.com/ethereum/go-ethereum/core" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/staker" + "github.com/offchainlabs/nitro/validator" "github.com/pkg/errors" ) @@ -27,8 +28,9 @@ type BlockValidatorDebugAPI struct { } type ValidateBlockResult struct { - Valid bool `json:"valid"` - Latency string `json:"latency"` + Valid bool `json:"valid"` + Latency string `json:"latency"` + GlobalState validator.GoGlobalState `json:"globalstate"` } func (a *BlockValidatorDebugAPI) ValidateMessageNumber( @@ -47,8 +49,11 @@ func (a *BlockValidatorDebugAPI) ValidateMessageNumber( moduleRoot = moduleRoots[0] } start_time := time.Now() - valid, err := a.val.ValidateBlock(ctx, arbutil.MessageIndex(msgNum), full, moduleRoot) - result.Valid = valid + valid, gs, err := a.val.ValidateResult(ctx, arbutil.MessageIndex(msgNum), full, moduleRoot) result.Latency = fmt.Sprintf("%vms", time.Since(start_time).Milliseconds()) + if gs != nil { + result.GlobalState = *gs + } + result.Valid = valid return result, err } diff --git a/staker/stateless_block_validator.go b/staker/stateless_block_validator.go index e45e9100f8..ae08b207c2 100644 --- a/staker/stateless_block_validator.go +++ b/staker/stateless_block_validator.go @@ -376,16 +376,16 @@ func (v *StatelessBlockValidator) CreateReadyValidationEntry(ctx context.Context return entry, nil } -func (v *StatelessBlockValidator) ValidateBlock( +func (v *StatelessBlockValidator) ValidateResult( ctx context.Context, pos arbutil.MessageIndex, useExec bool, moduleRoot common.Hash, -) (bool, error) { +) (bool, *validator.GoGlobalState, error) { entry, err := v.CreateReadyValidationEntry(ctx, pos) if err != nil { - return false, err + return false, nil, err } input, err := entry.ToInput() if err != nil { - return false, err + return false, nil, err } var spawners []validator.ValidationSpawner if useExec { @@ -394,7 +394,7 @@ func (v *StatelessBlockValidator) ValidateBlock( spawners = v.validationSpawners } if len(spawners) == 0 { - return false, errors.New("no validation defined") + return false, &entry.End, errors.New("no validation defined") } var runs []validator.ValidationRun for _, spawner := range spawners { @@ -409,10 +409,10 @@ func (v *StatelessBlockValidator) ValidateBlock( for _, run := range runs { gsEnd, err := run.Await(ctx) if err != nil || gsEnd != entry.End { - return false, err + return false, &gsEnd, err } } - return true, nil + return true, &entry.End, nil } func (v *StatelessBlockValidator) Start(ctx_in context.Context) error { From 93da3b6a0c2fc4f1cca019d7a1332a28004f69e1 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 15 Feb 2023 11:34:36 -0700 Subject: [PATCH 07/63] arbnode: small fixes --- arbnode/execution/block_recorder.go | 21 +++++++++++---------- arbnode/execution/executionengine.go | 16 ++++++++-------- arbnode/transaction_streamer.go | 2 -- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/arbnode/execution/block_recorder.go b/arbnode/execution/block_recorder.go index 735dd89de0..5152f33ebb 100644 --- a/arbnode/execution/block_recorder.go +++ b/arbnode/execution/block_recorder.go @@ -210,6 +210,9 @@ func (r *BlockRecorder) updateValidCandidateHdr(hdr *types.Header) { log.Warn("failed to get state in updateLastHdr", "err", err) return } + if r.validHdrCandidate != nil { + r.recordingDatabase.Dereference(r.validHdrCandidate) + } r.validHdrCandidate = hdr } @@ -227,7 +230,6 @@ func (r *BlockRecorder) MarkValid(pos arbutil.MessageIndex, resultHash common.Ha canonicalResultHash := r.execEngine.bc.GetCanonicalHash(uint64(validNum)) if canonicalResultHash != resultHash { log.Warn("markvalid hash not canonical", "pos", pos, "result", resultHash, "canonical", canonicalResultHash) - r.validHdrCandidate = nil return } // make sure the candidate is still canonical @@ -235,6 +237,7 @@ func (r *BlockRecorder) MarkValid(pos arbutil.MessageIndex, resultHash common.Ha candidateHash := r.validHdrCandidate.Hash() if canonicalHash != candidateHash { log.Error("vlid candidate hash not canonical", "number", r.validHdrCandidate.Number, "candidate", candidateHash, "canonical", canonicalHash) + r.recordingDatabase.Dereference(r.validHdrCandidate) r.validHdrCandidate = nil return } @@ -289,7 +292,7 @@ func (r *BlockRecorder) PrepareForRecord(ctx context.Context, start, end arbutil if end < start { return fmt.Errorf("illegal range start %d > end %d", start, end) } - numOfBlocks := uint64(end - start) + numOfBlocks := uint64(end + 1 - start) hdrNum := r.execEngine.MessageIndexToBlockNumber(start) if start > 0 { hdrNum-- // need to get previous @@ -297,24 +300,22 @@ func (r *BlockRecorder) PrepareForRecord(ctx context.Context, start, end arbutil numOfBlocks-- // genesis block doesn't need preparation, so recording one less block } lastHdrNum := hdrNum + numOfBlocks - var header *types.Header - for hdrNum < lastHdrNum { - newHeader := r.execEngine.bc.GetHeaderByNumber(uint64(hdrNum)) - if newHeader == nil { + for hdrNum <= lastHdrNum { + header := r.execEngine.bc.GetHeaderByNumber(uint64(hdrNum)) + if header == nil { log.Warn("prepareblocks asked for non-found block", "hdrNum", hdrNum) break } - _, err := r.recordingDatabase.GetOrRecreateState(ctx, newHeader, stateLogFunc) + _, err := r.recordingDatabase.GetOrRecreateState(ctx, header, stateLogFunc) if err != nil { log.Warn("prepareblocks failed to get state for block", "hdrNum", hdrNum, "err", err) break } - header = newHeader references = append(references, header) r.updateValidCandidateHdr(header) + r.updateLastHdr(header) hdrNum++ } - r.updateLastHdr(header) r.preparedAddTrim(references, 1000) return nil } @@ -322,7 +323,7 @@ func (r *BlockRecorder) PrepareForRecord(ctx context.Context, start, end arbutil func (r *BlockRecorder) ReorgTo(hdr *types.Header) { r.validHdrLock.Lock() if r.validHdr != nil && r.validHdr.Number.Cmp(hdr.Number) > 0 { - log.Error("block recorder: reorging past previously-marked final block", "reorg target num", hdr.Number, "hash", hdr.Hash(), "reorged past num", r.validHdr.Number, "hash", r.validHdr.Hash()) + log.Warn("block recorder: reorging past previously-marked valid block", "reorg target num", hdr.Number, "hash", hdr.Hash(), "reorged past num", r.validHdr.Number, "hash", r.validHdr.Hash()) r.recordingDatabase.Dereference(r.validHdr) r.validHdr = nil } diff --git a/arbnode/execution/executionengine.go b/arbnode/execution/executionengine.go index 590b449687..24266f8c7e 100644 --- a/arbnode/execution/executionengine.go +++ b/arbnode/execution/executionengine.go @@ -57,24 +57,24 @@ func NewExecutionEngine(bc *core.BlockChain) (*ExecutionEngine, error) { }, nil } -func (s *ExecutionEngine) SetReorgSequencingPolicy(reorgSequencing func() *arbos.SequencingHooks) { +func (s *ExecutionEngine) SetRecorder(recorder *BlockRecorder) { if s.Started() { - panic("trying to set reorg sequencing policy after start") + panic("trying to set recorder after start") } if s.reorgSequencing != nil { - panic("trying to set reorg sequencing policy when already set") + panic("trying to set recorder policy when already set") } - s.reorgSequencing = reorgSequencing + s.recorder = recorder } -func (s *ExecutionEngine) SetRecorder(recorder *BlockRecorder) { +func (s *ExecutionEngine) SetReorgSequencingPolicy(reorgSequencing func() *arbos.SequencingHooks) { if s.Started() { - panic("trying to set recorder after start") + panic("trying to set reorg sequencing policy after start") } if s.reorgSequencing != nil { - panic("trying to set recorder policy when already set") + panic("trying to set reorg sequencing policy when already set") } - s.recorder = recorder + s.reorgSequencing = reorgSequencing } func (s *ExecutionEngine) SetTransactionStreamer(streamer TransactionStreamerInterface) { diff --git a/arbnode/transaction_streamer.go b/arbnode/transaction_streamer.go index 3853204f83..677844203f 100644 --- a/arbnode/transaction_streamer.go +++ b/arbnode/transaction_streamer.go @@ -887,7 +887,6 @@ func (s *TransactionStreamer) feedNextMsg(ctx context.Context, exec *execution.E log.Info("feedOneMsg failed to send message to execEngine", "err", err, "pos", pos) return false } - return pos+1 < msgCount } @@ -906,7 +905,6 @@ func (s *TransactionStreamer) Start(ctxIn context.Context) { timer.Stop() case <-timer.C: } - } }) } From 7b44435a6042e0b9f765275ed54b4a37ca9f298f Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 15 Feb 2023 19:02:08 -0700 Subject: [PATCH 08/63] block_validator sorting and fixes --- staker/block_validator.go | 245 +++++++++++++++++++++----------------- 1 file changed, 134 insertions(+), 111 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index d389d2bc35..c6e616dc88 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -27,27 +27,32 @@ type BlockValidator struct { stopwaiter.StopWaiter *StatelessBlockValidator - validations containers.SyncMap[arbutil.MessageIndex, *validationStatus] - reorgMutex sync.RWMutex + reorgMutex sync.RWMutex - lastValidGS validator.GoGlobalState - // validInfoPrintTime time.Time TODO: print validated once in a while.. chainCaughtUp bool - lastCreateBatch []byte - lastCreateBatchMsgCount arbutil.MessageIndex - lastCreateGS validator.GoGlobalState - lastCreateDelayed uint64 + // can only be accessed from creation thread or if holding reorg-write + nextCreateBatch []byte + nextCreateBatchMsgCount arbutil.MessageIndex + nextCreateBatchReread bool + nextCreateStartGS validator.GoGlobalState + nextCreatePrevDelayed uint64 + + // only used by record loop or holding reorg-write + prepared arbutil.MessageIndex + nextRecordPrepared *containers.Promise[arbutil.MessageIndex] + + // can only be accessed from from validation thread or if holding reorg-write + lastValidGS validator.GoGlobalState + valLoopPos arbutil.MessageIndex + // validInfoPrintTime time.Time TODO: print validated once in a while.. + // can be read by anyone holding reorg-read + // written by appropriate thread or reorg-write createdA uint64 recordSentA uint64 validatedA uint64 - - // only used by record loop and reorg, not atomic - recordPrepardPos arbutil.MessageIndex - nextRecordPrepared *containers.Promise[arbutil.MessageIndex] - // only used by validation loop and reorg, not atomic - valLoopPos arbutil.MessageIndex + validations containers.SyncMap[arbutil.MessageIndex, *validationStatus] config BlockValidatorConfigFetcher @@ -55,6 +60,7 @@ type BlockValidator struct { sendRecordChan chan struct{} progressValidationsChan chan struct{} + // for testing only testingProgressMadeChan chan struct{} fatalErr chan<- error @@ -160,7 +166,7 @@ func NewBlockValidator( config BlockValidatorConfigFetcher, fatalErr chan<- error, ) (*BlockValidator, error) { - validator := &BlockValidator{ + ret := &BlockValidator{ StatelessBlockValidator: statelessBlockValidator, createNodesChan: make(chan struct{}, 1), sendRecordChan: make(chan struct{}, 1), @@ -169,17 +175,30 @@ func NewBlockValidator( fatalErr: fatalErr, } if !config().Dangerous.ResetBlockValidation { - validated, err := validator.ReadLastValidatedInfo() + validated, err := ret.ReadLastValidatedInfo() if err != nil { return nil, err } if validated != nil { - validator.lastValidGS = validated.GlobalState + ret.lastValidGS = validated.GlobalState } } - streamer.SetBlockValidator(validator) - inbox.SetBlockValidator(validator) - return validator, nil + // genesis block is impossible to validate unless genesis state is empty + if ret.lastValidGS.Batch == 0 { + genesis, err := streamer.ResultAtCount(1) + if err != nil { + return nil, err + } + ret.lastValidGS = validator.GoGlobalState{ + BlockHash: genesis.BlockHash, + SendRoot: genesis.SendRoot, + Batch: 1, + PosInBatch: 0, + } + } + streamer.SetBlockValidator(ret) + inbox.SetBlockValidator(ret) + return ret, nil } func atomicStorePos(addr *uint64, val arbutil.MessageIndex) { @@ -269,13 +288,16 @@ func GlobalStateToMsgCount(tracker InboxTrackerInterface, streamer TransactionSt return false, 0, err } } - count := prevBatchMsgCount + arbutil.MessageIndex(gs.PosInBatch) - curBatchMsgCount, err := tracker.GetBatchMessageCount(gs.Batch) - if err != nil { - return false, 0, fmt.Errorf("%w: getBatchMsgCount %d batchCount %d", err, gs.Batch, batchCount) - } - if curBatchMsgCount < count { - return false, 0, fmt.Errorf("%w: batch %d posInBatch %d, maxPosInBatch %d", ErrGlobalStateNotInChain, gs.Batch, gs.PosInBatch, curBatchMsgCount-prevBatchMsgCount) + count := prevBatchMsgCount + if gs.PosInBatch > 0 { + curBatchMsgCount, err := tracker.GetBatchMessageCount(gs.Batch) + if err != nil { + return false, 0, fmt.Errorf("%w: getBatchMsgCount %d batchCount %d", err, gs.Batch, batchCount) + } + count += arbutil.MessageIndex(gs.PosInBatch) + if curBatchMsgCount < count { + return false, 0, fmt.Errorf("%w: batch %d posInBatch %d, maxPosInBatch %d", ErrGlobalStateNotInChain, gs.Batch, gs.PosInBatch, curBatchMsgCount-prevBatchMsgCount) + } } processed, err := streamer.GetProcessedMessageCount() if err != nil { @@ -294,39 +316,34 @@ func GlobalStateToMsgCount(tracker InboxTrackerInterface, streamer TransactionSt return true, count, nil } -func (v *BlockValidator) checkValidatedGSCaughUp(ctx context.Context) error { - if v.chainCaughtUp { - return nil - } +func (v *BlockValidator) checkValidatedGSCaughUp(ctx context.Context) (bool, error) { v.reorgMutex.Lock() defer v.reorgMutex.Unlock() - var count arbutil.MessageIndex - if v.lastValidGS.Batch > 0 { - var caughtUp bool - var err error - caughtUp, count, err = GlobalStateToMsgCount(v.inboxTracker, v.streamer, v.lastValidGS) - if err != nil { - return err - } - if !caughtUp { - return nil - } + if v.chainCaughtUp { + return true, nil } - if v.lastValidGS.PosInBatch != 0 { - found, err := v.readLastCreatedBatch(ctx, v.lastValidGS.Batch) - if err != nil { - return err - } - if !found { - return fmt.Errorf("couldn't find batch %d though caught up", v.lastValidGS.Batch) - } + if v.lastValidGS.Batch == 0 { + return false, errors.New("lastValid not initialized. cannot validate genesis") + } + caughtUp, count, err := GlobalStateToMsgCount(v.inboxTracker, v.streamer, v.lastValidGS) + if err != nil { + return false, err + } + if !caughtUp { + return false, nil + } + msg, err := v.streamer.GetMessage(count - 1) + if err != nil { + return false, err } - v.lastCreateGS = v.lastValidGS + v.nextCreateBatchReread = true + v.nextCreateStartGS = v.lastValidGS + v.nextCreatePrevDelayed = msg.DelayedMessagesRead atomicStorePos(&v.createdA, count) atomicStorePos(&v.recordSentA, count) atomicStorePos(&v.validatedA, count) v.chainCaughtUp = true - return nil + return true, nil } func (v *BlockValidator) sendRecord(s *validationStatus) error { @@ -369,7 +386,7 @@ func (v *BlockValidator) SetCurrentWasmModuleRoot(hash common.Hash) error { defer v.moduleMutex.Unlock() if (hash == common.Hash{}) { - return errors.New("trying to set zero as wsmModuleRoot") + return errors.New("trying to set zero as wasmModuleRoot") } if hash == v.currentWasmModuleRoot { return nil @@ -392,25 +409,23 @@ func (v *BlockValidator) SetCurrentWasmModuleRoot(hash common.Hash) error { ) } -func (v *BlockValidator) readLastCreatedBatch(ctx context.Context, batchNum uint64) (bool, error) { +func (v *BlockValidator) readBatch(ctx context.Context, batchNum uint64) (bool, []byte, arbutil.MessageIndex, error) { batchCount, err := v.inboxTracker.GetBatchCount() if err != nil { - return false, err + return false, nil, 0, err } if batchCount < batchNum { - return false, nil + return false, nil, 0, nil } - batchMsgCount, err := v.inboxTracker.GetBatchMessageCount(v.lastCreateGS.Batch) + batchMsgCount, err := v.inboxTracker.GetBatchMessageCount(batchNum) if err != nil { - return false, err + return false, nil, 0, err } batch, err := v.inboxReader.GetSequencerMessageBytes(ctx, batchNum) if err != nil { - return false, err + return false, nil, 0, err } - v.lastCreateBatch = batch - v.lastCreateBatchMsgCount = batchMsgCount - return true, nil + return true, batch, batchMsgCount, nil } func (v *BlockValidator) createNextValidationEntry(ctx context.Context) (bool, error) { @@ -431,30 +446,34 @@ func (v *BlockValidator) createNextValidationEntry(ctx context.Context) (bool, e if err != nil { return false, err } - res, err := v.streamer.ResultAtCount(pos + 1) + endRes, err := v.streamer.ResultAtCount(pos + 1) if err != nil { return false, err } - if v.lastCreateGS.PosInBatch == 0 { + if v.nextCreateStartGS.PosInBatch == 0 || v.nextCreateBatchReread { // new batch - found, err := v.readLastCreatedBatch(ctx, v.lastCreateGS.Batch) + found, batch, count, err := v.readBatch(ctx, v.nextCreateStartGS.Batch) if !found { return false, err } + v.nextCreateBatch = batch + v.nextCreateBatchMsgCount = count + v.nextCreateBatchReread = false } endGS := validator.GoGlobalState{ - BlockHash: res.BlockHash, - SendRoot: res.SendRoot, - Batch: v.lastCreateGS.Batch, - PosInBatch: v.lastCreateGS.PosInBatch + 1, - } - if pos == v.lastCreateBatchMsgCount { - endGS.Batch++ + BlockHash: endRes.BlockHash, + SendRoot: endRes.SendRoot, + } + if pos < v.nextCreateBatchMsgCount { + endGS.Batch = v.nextCreateStartGS.Batch + endGS.PosInBatch = v.nextCreateStartGS.PosInBatch + 1 + } else if pos == v.nextCreateBatchMsgCount { + endGS.Batch = v.nextCreateStartGS.Batch + 1 endGS.PosInBatch = 0 - } else if pos > v.lastCreateBatchMsgCount { - return false, fmt.Errorf("illegal batch msg count %d pos %d batch %d", v.lastCreateBatchMsgCount, pos, endGS.Batch) + } else { + return false, fmt.Errorf("illegal batch msg count %d pos %d batch %d", v.nextCreateBatchMsgCount, pos, endGS.Batch) } - entry, err := newValidationEntry(pos, v.lastCreateGS, endGS, msg, v.lastCreateBatch, v.lastCreateDelayed) + entry, err := newValidationEntry(pos, v.nextCreateStartGS, endGS, msg, v.nextCreateBatch, v.nextCreatePrevDelayed) if err != nil { return false, err } @@ -463,9 +482,9 @@ func (v *BlockValidator) createNextValidationEntry(ctx context.Context) (bool, e Entry: entry, } v.validations.Store(pos, status) - v.lastCreateGS = endGS + v.nextCreateStartGS = endGS + v.nextCreatePrevDelayed = msg.DelayedMessagesRead atomicStorePos(&v.createdA, pos+1) - v.lastCreateDelayed = msg.DelayedMessagesRead return true, nil } @@ -488,29 +507,29 @@ func (v *BlockValidator) sendNextRecordPrepare() error { if err != nil { return err } - if prepared > v.recordPrepardPos { - v.recordPrepardPos = prepared + if prepared > v.prepared { + v.prepared = prepared } v.nextRecordPrepared = nil } else { return nil } } - prepareCount := v.validated() + arbutil.MessageIndex(v.config().PrerecordedBlocks) + nextPrepared := v.validated() + arbutil.MessageIndex(v.config().PrerecordedBlocks) created := v.created() - if prepareCount > created { - prepareCount = created + if nextPrepared > created { + nextPrepared = created } - if v.recordPrepardPos+2 > prepareCount { + if v.prepared >= nextPrepared { return nil } nextPromise := containers.NewPromise[arbutil.MessageIndex]() v.LaunchThread(func(ctx context.Context) { - err := v.recorder.PrepareForRecord(ctx, v.recordPrepardPos+1, prepareCount-1) + err := v.recorder.PrepareForRecord(ctx, v.prepared, nextPrepared-1) if err != nil { nextPromise.ProduceError(err) } else { - nextPromise.Produce(prepareCount - 1) + nextPromise.Produce(nextPrepared) nonBlockingTriger(v.sendRecordChan) } }) @@ -526,7 +545,7 @@ func (v *BlockValidator) sendNextRecordRequest(ctx context.Context) (bool, error return false, err } pos := v.recordSent() - if pos > v.recordPrepardPos { + if pos >= v.prepared { return false, nil } validationStatus, found := v.validations.Load(pos) @@ -583,7 +602,7 @@ validatiosLoop: v.reorgMutex.RUnlock() v.reorgMutex.RLock() pos = v.valLoopPos - if pos > v.recordSent() { + if pos >= v.recordSent() { return nil, nil } validationStatus, found := v.validations.Load(pos) @@ -613,7 +632,7 @@ validatiosLoop: err = fmt.Errorf("validation failed: expected %v got %v", validationStatus.Entry.End, runEnd) writeErr := v.writeToFile(validationStatus.Entry, run.WasmModuleRoot()) if writeErr != nil { - log.Warn("failed to write debug results file", "err", err) + log.Warn("failed to write debug results file", "err", writeErr) } } if err != nil { @@ -621,21 +640,18 @@ validatiosLoop: return &pos, nil // if not fatal - retry } } - for _, run := range validationStatus.Runs { - run.Close() - } v.lastValidGS = validationStatus.Entry.End go v.recorder.MarkValid(pos, v.lastValidGS.BlockHash) + err := v.writeLastValidatedToDb(validationStatus.Entry.End, wasmRoots) + if err != nil { + log.Error("failed writing new validated to database", "pos", pos, "err", err) + } atomicStorePos(&v.validatedA, pos+1) nonBlockingTriger(v.createNodesChan) nonBlockingTriger(v.sendRecordChan) if v.testingProgressMadeChan != nil { nonBlockingTriger(v.testingProgressMadeChan) } - err := v.writeLastValidatedToDb(validationStatus.Entry.End, wasmRoots) - if err != nil { - log.Error("failed writing new validated to database", "pos", pos, "err", err) - } continue } if room == 0 { @@ -644,7 +660,7 @@ validatiosLoop: if currentStatus == Prepared { replaced := validationStatus.replaceStatus(Prepared, SendingValidation) if !replaced { - v.possiblyFatal(errors.New("failed to set status")) + v.possiblyFatal(errors.New("failed to set SendingValidation status")) } v.LaunchThread(func(ctx context.Context) { validationCtx, cancel := context.WithCancel(ctx) @@ -690,6 +706,7 @@ func (v *BlockValidator) iterativeValidationProgress(ctx context.Context, ignore err := v.Reorg(ctx, *reorg) if err != nil { log.Error("error trying to rorg validation", "pos", *reorg-1, "err", err) + v.possiblyFatal(err) } } return v.config().ValidationPoll @@ -732,14 +749,19 @@ func (v *BlockValidator) AssumeValid(globalState validator.GoGlobalState) error // Because batches and blocks are handled at separate layers in the node, // and because block generation from messages is asynchronous, -// this call is different than ReorgToBlock, which is currently called later. +// this call is different than Reorg, which is currently called later. func (v *BlockValidator) ReorgToBatchCount(count uint64) { - // a normal reorg will be called if necessary - do nothing + v.reorgMutex.Lock() + defer v.reorgMutex.Unlock() + if v.nextCreateStartGS.Batch >= count { + v.nextCreateBatchReread = true + } } + func (v *BlockValidator) Reorg(ctx context.Context, count arbutil.MessageIndex) error { v.reorgMutex.Lock() defer v.reorgMutex.Unlock() - if count == 0 { + if count <= 1 { return errors.New("cannot reorg out genesis") } if !v.chainCaughtUp { @@ -758,14 +780,10 @@ func (v *BlockValidator) Reorg(ctx context.Context, count arbutil.MessageIndex) v.possiblyFatal(err) return err } - if endPosition.PosInBatch != 0 { - found, err := v.readLastCreatedBatch(ctx, endPosition.BatchNumber) - if err != nil { - return err - } - if !found { - return fmt.Errorf("couldn't find batch during reorg num %d", endPosition.BatchNumber) - } + msg, err := v.streamer.GetMessage(count - 1) + if err != nil { + v.possiblyFatal(err) + return err } for iPos := count; iPos < v.created(); iPos++ { status, found := v.validations.Load(iPos) @@ -774,7 +792,9 @@ func (v *BlockValidator) Reorg(ctx context.Context, count arbutil.MessageIndex) } v.validations.Delete(iPos) } - v.lastCreateGS = buildGlobalState(*res, endPosition) + v.nextCreateStartGS = buildGlobalState(*res, endPosition) + v.nextCreatePrevDelayed = msg.DelayedMessagesRead + v.nextCreateBatchReread = true countUint64 := uint64(count) v.createdA = countUint64 // under the reorg mutex we don't need atomic access @@ -783,12 +803,15 @@ func (v *BlockValidator) Reorg(ctx context.Context, count arbutil.MessageIndex) } if v.validatedA > countUint64 { v.validatedA = countUint64 - v.lastValidGS = v.lastCreateGS + v.lastValidGS = v.nextCreateStartGS err := v.writeLastValidatedToDb(v.lastValidGS, []common.Hash{}) // we don't know which wasm roots were validated if err != nil { log.Error("failed writing valid state after reorg", "err", err) } } + if v.prepared > count { + v.prepared = count + } nonBlockingTriger(v.createNodesChan) return nil } @@ -820,11 +843,11 @@ func (v *BlockValidator) Initialize() error { func (v *BlockValidator) LaunchWorkthreadsWhenCaughtUp(ctx context.Context) { for { - err := v.checkValidatedGSCaughUp(ctx) + caughtUp, err := v.checkValidatedGSCaughUp(ctx) if err != nil { log.Error("validator got error waiting for chain to catch up", "err", err) } - if v.chainCaughtUp { + if caughtUp { break } select { From 7d2aeacd1c5fc703c75381ed8b2a6ec8db1f76ab Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 15 Feb 2023 19:29:41 -0700 Subject: [PATCH 09/63] validator prints logs --- staker/block_validator.go | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index c6e616dc88..556d0530f1 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -43,9 +43,9 @@ type BlockValidator struct { nextRecordPrepared *containers.Promise[arbutil.MessageIndex] // can only be accessed from from validation thread or if holding reorg-write - lastValidGS validator.GoGlobalState - valLoopPos arbutil.MessageIndex - // validInfoPrintTime time.Time TODO: print validated once in a while.. + lastValidGS validator.GoGlobalState + valLoopPos arbutil.MessageIndex + validInfoPrintTime time.Time // can be read by anyone holding reorg-read // written by appropriate thread or reorg-write @@ -575,6 +575,15 @@ func (v *BlockValidator) iterativeValidationEntryRecorder(ctx context.Context, i return v.config().ValidationPoll } +func (v *BlockValidator) maybePrintNewlyValid() { + if time.Since(v.validInfoPrintTime) > time.Second { + log.Info("result validated", "count", v.validated(), "blockHash", v.lastValidGS.BlockHash) + v.validInfoPrintTime = time.Now() + } else { + log.Trace("result validated", "count", v.validated(), "blockHash", v.lastValidGS.BlockHash) + } +} + // return val: // *MessageIndex - pointer to bad entry if there is one (requires reorg) func (v *BlockValidator) advanceValidations(ctx context.Context) (*arbutil.MessageIndex, error) { @@ -652,6 +661,7 @@ validatiosLoop: if v.testingProgressMadeChan != nil { nonBlockingTriger(v.testingProgressMadeChan) } + v.maybePrintNewlyValid() continue } if room == 0 { From ed05af0ae90d81d02b1c5c62e5f165af55d4eff2 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 17 Feb 2023 18:23:19 -0700 Subject: [PATCH 10/63] staker: more fixes --- staker/block_challenge_backend.go | 10 +++---- staker/challenge_manager.go | 7 +++-- staker/l1_validator.go | 43 +++++++------------------------ 3 files changed, 18 insertions(+), 42 deletions(-) diff --git a/staker/block_challenge_backend.go b/staker/block_challenge_backend.go index e545efd23e..303ff50d8c 100644 --- a/staker/block_challenge_backend.go +++ b/staker/block_challenge_backend.go @@ -94,7 +94,7 @@ func (b *BlockChallengeBackend) findBatchFromMessageCount(msgCount arbutil.Messa if batchMsgCount < msgCount { low = mid + 1 } else if batchMsgCount == msgCount { - return mid + 1, nil + return mid, nil } else if mid == low { // batchMsgCount > msgCount return mid, nil } else { // batchMsgCount > msgCount @@ -108,13 +108,13 @@ func (b *BlockChallengeBackend) FindGlobalStateFromMessageCount(count arbutil.Me if err != nil { return validator.GoGlobalState{}, err } - var batchMsgCount arbutil.MessageIndex + var prevBatchMsgCount arbutil.MessageIndex if batch > 0 { - batchMsgCount, err = b.inboxTracker.GetBatchMessageCount(batch - 1) + prevBatchMsgCount, err = b.inboxTracker.GetBatchMessageCount(batch - 1) if err != nil { return validator.GoGlobalState{}, err } - if batchMsgCount > count { + if prevBatchMsgCount > count { return validator.GoGlobalState{}, errors.New("findBatchFromMessageCount returned bad batch") } } @@ -126,7 +126,7 @@ func (b *BlockChallengeBackend) FindGlobalStateFromMessageCount(count arbutil.Me BlockHash: res.BlockHash, SendRoot: res.SendRoot, Batch: batch, - PosInBatch: uint64(count - batchMsgCount), + PosInBatch: uint64(count - prevBatchMsgCount), }, nil } diff --git a/staker/challenge_manager.go b/staker/challenge_manager.go index ebfaa6e1ea..76e193d73f 100644 --- a/staker/challenge_manager.go +++ b/staker/challenge_manager.go @@ -449,9 +449,8 @@ func (m *ChallengeManager) IssueOneStepProof( ) } -// count is for the initial machine, which also means it's the position of the challenged machine +// count is for the initial machine, which also means it's the position of the message that's digested in the challenge func (m *ChallengeManager) createExecutionBackend(ctx context.Context, initialCount arbutil.MessageIndex, tooFar bool) error { - // Get the next message and block header, and record the full block creation if m.initialMachineMessageCount == initialCount && m.executionChallengeBackend != nil { return nil } @@ -541,11 +540,11 @@ func (m *ChallengeManager) Act(ctx context.Context) (*types.Transaction, error) return nil, fmt.Errorf("error getting execution challenge final state: %w", err) } if expectedStatus != computedStatus { - return nil, fmt.Errorf("after block %v expected status %v but got %v", initialCount, expectedStatus, computedStatus) + return nil, fmt.Errorf("after msg %d expected status %v but got %v", initialCount, expectedStatus, computedStatus) } if computedStatus == StatusFinished { if computedState != expectedState { - return nil, fmt.Errorf("after block %v expected global state %v but got %v", initialCount, expectedState, computedState) + return nil, fmt.Errorf("after msg %d expected global state %v but got %v", initialCount, expectedState, computedState) } } log.Info("issuing one step proof", "challenge", m.challengeIndex, "stepCount", stepCount, "initial count", initialCount) diff --git a/staker/l1_validator.go b/staker/l1_validator.go index ece27dceb1..e9029bf265 100644 --- a/staker/l1_validator.go +++ b/staker/l1_validator.go @@ -256,7 +256,7 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta } else { log.Info("catching up to chain blocks", "target", target, "current", current) } - return nil, false, err + return nil, false, nil } var validatedCount arbutil.MessageIndex @@ -272,7 +272,8 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta return nil, false, fmt.Errorf("%w: not found validated block in blockchain", err) } if !caughtUp { - log.Info("catching up to laste validated block", "target", valInfo.GlobalState) + log.Info("catching up to last validated block", "target", valInfo.GlobalState) + return nil, false, nil } if err := v.updateBlockValidatorModuleRoot(ctx); err != nil { return nil, false, fmt.Errorf("error updating block validator module root: %w", err) @@ -353,21 +354,8 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta wrongNodesExist = true continue } - afterGs := nd.AfterState().GlobalState - requiredBatches := nd.AfterState().RequiredBatches() - if localBatchCount < requiredBatches { - return nil, false, fmt.Errorf("waiting for validator to catch up to assertion batches: %v/%v", localBatchCount, requiredBatches) - } - if requiredBatches > 0 { - haveAcc, err := v.inboxTracker.GetBatchAcc(requiredBatches - 1) - if err != nil { - return nil, false, fmt.Errorf("%w: error getting batch %v accumulator: localBatchCount: %d", err, requiredBatches-1, localBatchCount) - } - if haveAcc != nd.AfterInboxBatchAcc { - return nil, false, fmt.Errorf("missed sequencer batches reorg: at seq num %v have acc %v but assertion has acc %v", requiredBatches-1, haveAcc, nd.AfterInboxBatchAcc) - } - } - caughtUp, nodeMsgCount, err := GlobalStateToMsgCount(v.inboxTracker, v.txStreamer, startState.GlobalState) + afterGS := nd.AfterState().GlobalState + caughtUp, nodeMsgCount, err := GlobalStateToMsgCount(v.inboxTracker, v.txStreamer, afterGS) if errors.Is(err, ErrGlobalStateNotInChain) { wrongNodesExist = true log.Error("Found incorrect assertion", "err", err) @@ -377,7 +365,7 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta return nil, false, fmt.Errorf("error getting block number from global state: %w", err) } if !caughtUp { - return nil, false, fmt.Errorf("waiting for validator to catch up to assertion blocks. Current: %d target: %v", validatedCount, startState.GlobalState) + return nil, false, fmt.Errorf("waiting for node to catch up to assertion blocks. Current: %d target: %v", validatedCount, afterGS) } if validatedCount < nodeMsgCount { return nil, false, fmt.Errorf("waiting for validator to catch up to assertion blocks. %d / %d", validatedCount, nodeMsgCount) @@ -386,7 +374,7 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta "found correct assertion", "node", nd.NodeNum, "count", validatedCount, - "blockHash", afterGs.BlockHash, + "blockHash", afterGS.BlockHash, ) correctNode = existingNodeAction{ number: nd.NodeNum, @@ -404,7 +392,7 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta if len(successorNodes) > 0 { lastNodeHashIfExists = &successorNodes[len(successorNodes)-1].NodeHash } - action, err := v.createNewNodeAction(ctx, stakerInfo, localBatchCount, prevInboxMaxCount, startCount, startState, validatedCount, validatedGlobalState, lastNodeHashIfExists) + action, err := v.createNewNodeAction(ctx, stakerInfo, prevInboxMaxCount, startCount, startState, validatedCount, validatedGlobalState, lastNodeHashIfExists) if err != nil { return nil, wrongNodesExist, fmt.Errorf("error generating create new node action (from pos %d to %d): %w", startCount, validatedCount, err) } @@ -417,7 +405,6 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta func (v *L1Validator) createNewNodeAction( ctx context.Context, stakerInfo *OurStakerInfo, - localBatchCount uint64, prevInboxMaxCount *big.Int, startCount arbutil.MessageIndex, startState *validator.ExecutionState, @@ -428,23 +415,13 @@ func (v *L1Validator) createNewNodeAction( if !prevInboxMaxCount.IsUint64() { return nil, fmt.Errorf("inbox max count %v isn't a uint64", prevInboxMaxCount) } - minBatchCount := prevInboxMaxCount.Uint64() - if localBatchCount < minBatchCount { - // not enough batches in database - return nil, nil - } - - if localBatchCount == 0 { - // we haven't validated anything - return nil, nil - } if validatedCount < startCount { // we haven't validated any new blocks return nil, nil } - if validatedGS.Batch < minBatchCount { + if validatedGS.Batch < prevInboxMaxCount.Uint64() { // didn't validate enough batches - return nil, nil + return nil, fmt.Errorf("waiting for validator to validate enough batches %d/%d", validatedGS.Batch, prevInboxMaxCount) } batchValidated := validatedGS.Batch if validatedGS.PosInBatch == 0 { From ad5dce9218d4a21a1aa20a283d61280d275a97cb Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Tue, 21 Feb 2023 15:43:25 -0700 Subject: [PATCH 11/63] validator node in system tests --- staker/block_validator.go | 5 +---- system_tests/block_validator_test.go | 4 +--- system_tests/common_test.go | 28 ++++++++++++++++------------ validator/valnode/valnode.go | 1 + 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index 556d0530f1..276f310cfb 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -117,7 +117,7 @@ var DefaultBlockValidatorConfig = BlockValidatorConfig{ var TestBlockValidatorConfig = BlockValidatorConfig{ Enable: false, - URL: "ws://localhost/", + URL: "", JWTSecret: "", ValidationPoll: 100 * time.Millisecond, ForwardBlocks: 128, @@ -908,9 +908,6 @@ func (v *BlockValidator) WaitForPos(t *testing.T, ctx context.Context, pos arbut case <-timer.C: lastLoop = true case <-trigerchan: - if pos+1 >= v.validated() { - return true - } case <-ctx.Done(): lastLoop = true } diff --git a/system_tests/block_validator_test.go b/system_tests/block_validator_test.go index 5a02c5873d..2b50031824 100644 --- a/system_tests/block_validator_test.go +++ b/system_tests/block_validator_test.go @@ -36,11 +36,9 @@ func testBlockValidatorSimple(t *testing.T, dasModeString string, simpletxloops validatorConfig := arbnode.ConfigDefaultL1NonSequencerTest() validatorConfig.BlockValidator.Enable = true - // TODO - // validatorConfig.BlockValidator.ArbitratorValidator = arbitrator - // validatorConfig.BlockValidator.JitValidator = !arbitrator validatorConfig.DataAvailability = l1NodeConfigA.DataAvailability validatorConfig.DataAvailability.AggregatorConfig.Enable = false + AddDefaultValNode(t, ctx, validatorConfig, !arbitrator) l2clientB, nodeB := Create2ndNodeWithConfig(t, ctx, nodeA, l1stack, l1info, &l2info.ArbInitData, validatorConfig) defer nodeB.StopAndWait() l2info.GenerateAccount("User2") diff --git a/system_tests/common_test.go b/system_tests/common_test.go index 4e5ae6d3a0..f3f3d0d3de 100644 --- a/system_tests/common_test.go +++ b/system_tests/common_test.go @@ -259,6 +259,19 @@ func configByValidationNode(t *testing.T, clientConfig *arbnode.Config, valStack clientConfig.BlockValidator.JWTSecret = "" } +func AddDefaultValNode(t *testing.T, ctx context.Context, nodeConfig *arbnode.Config, useJit bool) { + if !nodeConfig.ValidatorRequired() { + return + } + if nodeConfig.BlockValidator.URL != "" { + return + } + conf := valnode.TestValidationConfig + conf.UseJit = useJit + _, valStack := createTestValidationNode(t, ctx, &conf) + configByValidationNode(t, nodeConfig, valStack) +} + func createTestL1BlockChainWithConfig(t *testing.T, l1info info, stackConfig *node.Config) (info, *ethclient.Client, *eth.Ethereum, *node.Node) { if l1info == nil { l1info = NewL1TestInfo(t) @@ -438,10 +451,7 @@ func createTestNodeOnL1WithConfigImpl( nodeConfig.DelayedSequencer.Enable = false } - if nodeConfig.ValidatorRequired() { - _, valStack := createTestValidationNode(t, ctx, &valnode.TestValidationConfig) - configByValidationNode(t, nodeConfig, valStack) - } + AddDefaultValNode(t, ctx, nodeConfig, true) var err error currentNode, err = arbnode.CreateNode( @@ -470,10 +480,7 @@ func CreateTestL2WithConfig( ) (*BlockchainTestInfo, *arbnode.Node, *ethclient.Client) { feedErrChan := make(chan error, 10) - if nodeConfig.ValidatorRequired() { - _, valStack := createTestValidationNode(t, ctx, &valnode.TestValidationConfig) - configByValidationNode(t, nodeConfig, valStack) - } + AddDefaultValNode(t, ctx, nodeConfig, true) l2info, stack, chainDb, arbDb, blockchain := createL2BlockChain(t, l2Info, "", params.ArbitrumDevTestChainConfig()) currentNode, err := arbnode.CreateNode(ctx, stack, chainDb, arbDb, nodeConfig, blockchain, nil, nil, nil, nil, feedErrChan) @@ -577,10 +584,7 @@ func Create2ndNodeWithConfig( l2blockchain, err := execution.WriteOrTestBlockChain(l2chainDb, nil, initReader, first.Execution.ArbInterface.BlockChain().Config(), arbnode.ConfigDefaultL2Test().TxLookupLimit, 0) Require(t, err) - if nodeConfig.ValidatorRequired() { - _, valStack := createTestValidationNode(t, ctx, &valnode.TestValidationConfig) - configByValidationNode(t, nodeConfig, valStack) - } + AddDefaultValNode(t, ctx, nodeConfig, true) currentNode, err := arbnode.CreateNode(ctx, l2stack, l2chainDb, l2arbDb, nodeConfig, l2blockchain, l1client, first.DeployInfo, &txOpts, dataSigner, feedErrChan) Require(t, err) diff --git a/validator/valnode/valnode.go b/validator/valnode/valnode.go index 12d12b0574..7155cb0ada 100644 --- a/validator/valnode/valnode.go +++ b/validator/valnode/valnode.go @@ -47,6 +47,7 @@ var DefaultValidationConfig = Config{ } var TestValidationConfig = Config{ + UseJit: true, Jit: server_jit.DefaultJitSpawnerConfig, ApiAuth: false, ApiPublic: true, From d7776fee1e91b718a6024c00a364d803b0594be0 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Tue, 21 Feb 2023 15:43:50 -0700 Subject: [PATCH 12/63] blockvalidator tests: multiple txs in batch --- system_tests/block_validator_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/system_tests/block_validator_test.go b/system_tests/block_validator_test.go index 2b50031824..e3ab350ecf 100644 --- a/system_tests/block_validator_test.go +++ b/system_tests/block_validator_test.go @@ -28,6 +28,12 @@ func testBlockValidatorSimple(t *testing.T, dasModeString string, simpletxloops chainConfig, l1NodeConfigA, lifecycleManager, _, dasSignerKey := setupConfigWithDAS(t, ctx, dasModeString) defer lifecycleManager.StopAndWaitUntil(time.Second) + var delayEvery int + if simpletxloops > 1 { + l1NodeConfigA.BatchPoster.MaxBatchPostInterval = time.Millisecond * 500 + delayEvery = simpletxloops / 3 + } + l2info, nodeA, l2client, l1info, _, l1client, l1stack := createTestNodeOnL1WithConfig(t, ctx, true, l1NodeConfigA, chainConfig, nil) defer requireClose(t, l1stack) defer nodeA.StopAndWait() @@ -76,7 +82,9 @@ func testBlockValidatorSimple(t *testing.T, dasModeString string, simpletxloops _, err = WaitForTx(ctx, l2client, tx.Hash(), time.Second*5) Require(t, err) } - + if delayEvery > 0 && i%delayEvery == (delayEvery-1) { + <-time.After(time.Second) + } } delayedTx := l2info.PrepareTx("Owner", "User2", 30002, perTransfer, nil) From d1a60e5583275a52b53fcbce5e9e8494efc05698 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Tue, 21 Feb 2023 19:24:28 -0700 Subject: [PATCH 13/63] validation fixes --- staker/block_validator.go | 6 +++--- staker/stateless_block_validator.go | 12 ++++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index 276f310cfb..3b2f5a4d42 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -464,10 +464,10 @@ func (v *BlockValidator) createNextValidationEntry(ctx context.Context) (bool, e BlockHash: endRes.BlockHash, SendRoot: endRes.SendRoot, } - if pos < v.nextCreateBatchMsgCount { + if pos+1 < v.nextCreateBatchMsgCount { endGS.Batch = v.nextCreateStartGS.Batch endGS.PosInBatch = v.nextCreateStartGS.PosInBatch + 1 - } else if pos == v.nextCreateBatchMsgCount { + } else if pos+1 == v.nextCreateBatchMsgCount { endGS.Batch = v.nextCreateStartGS.Batch + 1 endGS.PosInBatch = 0 } else { @@ -898,7 +898,7 @@ func (v *BlockValidator) WaitForPos(t *testing.T, ctx context.Context, pos arbut defer timer.Stop() lastLoop := false for { - if pos >= v.validated() { + if v.validated() > pos { return true } if lastLoop { diff --git a/staker/stateless_block_validator.go b/staker/stateless_block_validator.go index ae08b207c2..bf40cf2f22 100644 --- a/staker/stateless_block_validator.go +++ b/staker/stateless_block_validator.go @@ -194,13 +194,21 @@ func newValidationEntry( Number: start.Batch, Data: batch, } + hasDelayed := false + var delayedNum uint64 + if msg.DelayedMessagesRead == prevDelayed+1 { + hasDelayed = true + delayedNum = prevDelayed + } else if msg.DelayedMessagesRead != prevDelayed { + return nil, fmt.Errorf("illegal validation entry delayedMessage %d, previous %d", msg.DelayedMessagesRead, prevDelayed) + } return &validationEntry{ Stage: ReadyForRecord, Pos: pos, Start: start, End: end, - HasDelayedMsg: (msg.DelayedMessagesRead > prevDelayed), - DelayedMsgNr: msg.DelayedMessagesRead, + HasDelayedMsg: hasDelayed, + DelayedMsgNr: delayedNum, msg: msg, BatchInfo: []validator.BatchInfo{batchInfo}, }, nil From 796cb6aa474191b2f9c9b5effc64f6ea47001b11 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Tue, 7 Mar 2023 00:11:20 +0200 Subject: [PATCH 14/63] fix challenges FindGlobalStateFromMessageCount --- staker/block_challenge_backend.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/staker/block_challenge_backend.go b/staker/block_challenge_backend.go index 303ff50d8c..8d32b24213 100644 --- a/staker/block_challenge_backend.go +++ b/staker/block_challenge_backend.go @@ -72,7 +72,7 @@ func NewBlockChallengeBackend( }, nil } -func (b *BlockChallengeBackend) findBatchFromMessageCount(msgCount arbutil.MessageIndex) (uint64, error) { +func (b *BlockChallengeBackend) findBatchAfterMessageCount(msgCount arbutil.MessageIndex) (uint64, error) { if msgCount == 0 { return 0, nil } @@ -94,7 +94,7 @@ func (b *BlockChallengeBackend) findBatchFromMessageCount(msgCount arbutil.Messa if batchMsgCount < msgCount { low = mid + 1 } else if batchMsgCount == msgCount { - return mid, nil + return mid + 1, nil } else if mid == low { // batchMsgCount > msgCount return mid, nil } else { // batchMsgCount > msgCount @@ -104,7 +104,7 @@ func (b *BlockChallengeBackend) findBatchFromMessageCount(msgCount arbutil.Messa } func (b *BlockChallengeBackend) FindGlobalStateFromMessageCount(count arbutil.MessageIndex) (validator.GoGlobalState, error) { - batch, err := b.findBatchFromMessageCount(count) + batch, err := b.findBatchAfterMessageCount(count) if err != nil { return validator.GoGlobalState{}, err } From be93c1c46e9b4da74ad03956506a95b719a8e607 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 6 Mar 2023 12:39:18 +0200 Subject: [PATCH 15/63] stateless_block_validator: use recorder interface --- staker/stateless_block_validator.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/staker/stateless_block_validator.go b/staker/stateless_block_validator.go index bf40cf2f22..2976b6b17e 100644 --- a/staker/stateless_block_validator.go +++ b/staker/stateless_block_validator.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "sync" + "testing" "github.com/offchainlabs/nitro/arbnode/execution" "github.com/offchainlabs/nitro/util/signature" @@ -30,7 +31,7 @@ type StatelessBlockValidator struct { execSpawner validator.ExecutionSpawner validationSpawners []validator.ValidationSpawner - recorder *execution.BlockRecorder + recorder BlockRecorder inboxReader InboxReaderInterface inboxTracker InboxTrackerInterface @@ -47,6 +48,16 @@ type BlockValidatorRegistrer interface { SetBlockValidator(*BlockValidator) } +type BlockRecorder interface { + RecordBlockCreation( + ctx context.Context, + pos arbutil.MessageIndex, + msg *arbostypes.MessageWithMetadata, + ) (*execution.RecordResult, error) + MarkValid(pos arbutil.MessageIndex, resultHash common.Hash) + PrepareForRecord(ctx context.Context, start, end arbutil.MessageIndex) error +} + type InboxTrackerInterface interface { BlockValidatorRegistrer GetDelayedMessageBytes(uint64) ([]byte, error) @@ -218,7 +229,7 @@ func NewStatelessBlockValidator( inboxReader InboxReaderInterface, inbox InboxTrackerInterface, streamer TransactionStreamerInterface, - recorder *execution.BlockRecorder, + recorder BlockRecorder, arbdb ethdb.Database, das arbstate.DataAvailabilityReader, config *BlockValidatorConfig, @@ -423,6 +434,10 @@ func (v *StatelessBlockValidator) ValidateResult( return true, &entry.End, nil } +func (v *StatelessBlockValidator) OverrideRecorder(t *testing.T, recorder BlockRecorder) { + v.recorder = recorder +} + func (v *StatelessBlockValidator) Start(ctx_in context.Context) error { err := v.execSpawner.Start(ctx_in) if err != nil { From 31ffc5ca97e843cc855edb202cc1cc3112318dca Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 6 Mar 2023 12:51:10 +0200 Subject: [PATCH 16/63] validation_mock improvements create mock recorder, store execution run requests --- system_tests/validation_mock_test.go | 71 +++++++++++++++++++++------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/system_tests/validation_mock_test.go b/system_tests/validation_mock_test.go index 7cd7f5f979..98db34c0eb 100644 --- a/system_tests/validation_mock_test.go +++ b/system_tests/validation_mock_test.go @@ -3,7 +3,6 @@ package arbtest import ( "bytes" "context" - "errors" "math/big" "testing" "time" @@ -12,6 +11,11 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/node" "github.com/ethereum/go-ethereum/rpc" + "github.com/offchainlabs/nitro/arbnode" + "github.com/offchainlabs/nitro/arbnode/execution" + "github.com/offchainlabs/nitro/arbos/arbostypes" + "github.com/offchainlabs/nitro/arbutil" + "github.com/offchainlabs/nitro/staker" "github.com/offchainlabs/nitro/util/containers" "github.com/offchainlabs/nitro/validator" "github.com/offchainlabs/nitro/validator/server_api" @@ -19,6 +23,7 @@ import ( ) type mockSpawner struct { + ExecSpawned []uint64 } var blockHashKey = common.HexToHash("0x11223344") @@ -49,11 +54,7 @@ func (s *mockSpawner) Launch(entry *validator.ValidationInput, moduleRoot common Promise: containers.NewPromise[validator.GoGlobalState](), root: moduleRoot, } - if moduleRoot != mockWasmModuleRoot { - run.ProduceError(errors.New("unsupported root")) - } else { - run.Produce(globalstateFromTestPreimages(entry.Preimages)) - } + run.Produce(globalstateFromTestPreimages(entry.Preimages)) return run } @@ -66,19 +67,13 @@ func (s *mockSpawner) Room() int { return 4 } func (s *mockSpawner) LatestWasmModuleRoot() (common.Hash, error) { return mockWasmModuleRoot, nil } func (s *mockSpawner) CreateExecutionRun(wasmModuleRoot common.Hash, input *validator.ValidationInput) (validator.ExecutionRun, error) { - if wasmModuleRoot != mockWasmModuleRoot { - return nil, errors.New("unsupported root") - } + s.ExecSpawned = append(s.ExecSpawned, input.Id) return &mockExecRun{ startState: input.StartState, endState: globalstateFromTestPreimages(input.Preimages), }, nil } -func (s *mockSpawner) LamockWasmModuleRoot() (common.Hash, error) { - return common.HexToHash("0x1234567890abcdeffedcba0987654321"), nil -} - func (s *mockSpawner) WriteToFile(input *validator.ValidationInput, expOut validator.GoGlobalState, moduleRoot common.Hash) error { return nil } @@ -147,7 +142,7 @@ type mockMachineStep struct { func (s *mockMachineStep) Close() {} -func createMockValidationNode(t *testing.T, ctx context.Context, config *server_arb.ArbitratorSpawnerConfig) *node.Node { +func createMockValidationNode(t *testing.T, ctx context.Context, config *server_arb.ArbitratorSpawnerConfig) (*mockSpawner, *node.Node) { stackConf := node.DefaultConfig stackConf.HTTPPort = 0 stackConf.DataDir = "" @@ -187,7 +182,7 @@ func createMockValidationNode(t *testing.T, ctx context.Context, config *server_ serverAPI.StopOnly() }() - return stack + return spawner, stack } // mostly tests translation to/from json and running over network @@ -195,7 +190,7 @@ func TestValidationServerAPI(t *testing.T) { t.Parallel() ctx, cancel := context.WithCancel(context.Background()) defer cancel() - validationDefault := createMockValidationNode(t, ctx, nil) + _, validationDefault := createMockValidationNode(t, ctx, nil) client := server_api.NewExecutionClient(validationDefault.WSEndpoint(), nil) err := client.Start(ctx) Require(t, err) @@ -259,10 +254,10 @@ func TestExecutionKeepAlive(t *testing.T) { t.Parallel() ctx, cancel := context.WithCancel(context.Background()) defer cancel() - validationDefault := createMockValidationNode(t, ctx, nil) + _, validationDefault := createMockValidationNode(t, ctx, nil) shortTimeoutConfig := server_arb.DefaultArbitratorSpawnerConfig shortTimeoutConfig.ExecRunTimeout = time.Second - validationShortTO := createMockValidationNode(t, ctx, &shortTimeoutConfig) + _, validationShortTO := createMockValidationNode(t, ctx, &shortTimeoutConfig) clientDefault := server_api.NewExecutionClient(validationDefault.WSEndpoint(), nil) err := clientDefault.Start(ctx) @@ -290,3 +285,43 @@ func TestExecutionKeepAlive(t *testing.T) { t.Error("getStep should have timed out but didn't") } } + +type mockBlockRecorder struct { + validator *staker.StatelessBlockValidator + streamer *arbnode.TransactionStreamer +} + +func (m *mockBlockRecorder) RecordBlockCreation( + ctx context.Context, + pos arbutil.MessageIndex, + msg *arbostypes.MessageWithMetadata, +) (*execution.RecordResult, error) { + _, globalpos, err := m.validator.GlobalStatePositionsAtCount(pos + 1) + if err != nil { + return nil, err + } + res, err := m.streamer.ResultAtCount(pos + 1) + if err != nil { + return nil, err + } + globalState := validator.GoGlobalState{ + Batch: globalpos.BatchNumber, + PosInBatch: globalpos.PosInBatch, + BlockHash: res.BlockHash, + SendRoot: res.SendRoot, + } + return &execution.RecordResult{ + Pos: pos, + BlockHash: res.BlockHash, + Preimages: globalstateToTestPreimages(globalState), + }, nil +} + +func (m *mockBlockRecorder) MarkValid(pos arbutil.MessageIndex, resultHash common.Hash) {} +func (m *mockBlockRecorder) PrepareForRecord(ctx context.Context, start, end arbutil.MessageIndex) error { + return nil +} + +func newMockRecorder(validator *staker.StatelessBlockValidator, streamer *arbnode.TransactionStreamer) *mockBlockRecorder { + return &mockBlockRecorder{validator, streamer} +} From 30eac5472933f03e47956fa3ab76082fa286abb6 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 6 Mar 2023 12:52:12 +0200 Subject: [PATCH 17/63] full challenge test: add mocks for various pos-in-batch --- system_tests/full_challenge_impl_test.go | 92 ++++++++++++++++++++---- system_tests/full_challenge_test.go | 4 +- 2 files changed, 79 insertions(+), 17 deletions(-) diff --git a/system_tests/full_challenge_impl_test.go b/system_tests/full_challenge_impl_test.go index 4aa83f8081..24795f6237 100644 --- a/system_tests/full_challenge_impl_test.go +++ b/system_tests/full_challenge_impl_test.go @@ -18,6 +18,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethclient" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/node" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" @@ -137,13 +138,15 @@ func writeTxToBatch(writer io.Writer, tx *types.Transaction) error { return err } -func makeBatch(t *testing.T, l2Node *arbnode.Node, l2Info *BlockchainTestInfo, backend *ethclient.Client, sequencer *bind.TransactOpts, seqInbox *mocksgen.SequencerInboxStub, seqInboxAddr common.Address, isChallenger bool) { +const MsgPerBatch = int64(5) + +func makeBatch(t *testing.T, l2Node *arbnode.Node, l2Info *BlockchainTestInfo, backend *ethclient.Client, sequencer *bind.TransactOpts, seqInbox *mocksgen.SequencerInboxStub, seqInboxAddr common.Address, modStep int64) { ctx := context.Background() batchBuffer := bytes.NewBuffer([]byte{}) - for i := int64(0); i < 10; i++ { + for i := int64(0); i < MsgPerBatch; i++ { value := i - if i == 5 && isChallenger { + if i == modStep { value++ } err := writeTxToBatch(batchBuffer, l2Info.PrepareTx("Owner", "Destination", 1000000, big.NewInt(value), []byte{})) @@ -153,9 +156,9 @@ func makeBatch(t *testing.T, l2Node *arbnode.Node, l2Info *BlockchainTestInfo, b Require(t, err) message := append([]byte{0}, compressed...) - maxUint256 := new(big.Int).Lsh(common.Big1, 256) - maxUint256.Sub(maxUint256, common.Big1) - tx, err := seqInbox.AddSequencerL2BatchFromOrigin0(sequencer, maxUint256, message, big.NewInt(1), common.Address{}, big.NewInt(0), big.NewInt(0)) + seqNum := new(big.Int).Lsh(common.Big1, 256) + seqNum.Sub(seqNum, common.Big1) + tx, err := seqInbox.AddSequencerL2BatchFromOrigin0(sequencer, seqNum, message, big.NewInt(1), common.Address{}, big.NewInt(0), big.NewInt(0)) Require(t, err) receipt, err := EnsureTxSucceeded(ctx, backend, tx) Require(t, err) @@ -218,8 +221,7 @@ func setupSequencerInboxStub(ctx context.Context, t *testing.T, l1Info *Blockcha return bridgeAddr, seqInbox, seqInboxAddr } -func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { - t.Parallel() +func RunChallengeTest(t *testing.T, asserterIsCorrect bool, useStubs bool, challengeMsgIdx int64) { glogger := log.NewGlogHandler(log.StreamHandler(os.Stderr, log.TerminalFormat(false))) glogger.Verbosity(log.LvlInfo) log.Root().SetHandler(glogger) @@ -241,7 +243,13 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { conf.BatchPoster.Enable = false conf.InboxReader.CheckDelay = time.Second - _, valStack := createTestValidationNode(t, ctx, &valnode.TestValidationConfig) + var valStack *node.Node + var mockSpawn *mockSpawner + if useStubs { + mockSpawn, valStack = createMockValidationNode(t, ctx, &valnode.TestValidationConfig.Arbitrator) + } else { + _, valStack = createTestValidationNode(t, ctx, &valnode.TestValidationConfig) + } configByValidationNode(t, conf, valStack) fatalErrChan := make(chan error, 10) @@ -274,8 +282,22 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { asserterL2Info.GenerateAccount("Destination") challengerL2Info.SetFullAccountInfo("Destination", asserterL2Info.GetInfoWithPrivKey("Destination")) - makeBatch(t, asserterL2, asserterL2Info, l1Backend, &sequencerTxOpts, asserterSeqInbox, asserterSeqInboxAddr, false) - makeBatch(t, challengerL2, challengerL2Info, l1Backend, &sequencerTxOpts, challengerSeqInbox, challengerSeqInboxAddr, true) + + if challengeMsgIdx < 1 || challengeMsgIdx > 3*MsgPerBatch { + Fail(t, "challengeMsgIdx illegal") + } + + // seqNum := common.Big2 + makeBatch(t, asserterL2, asserterL2Info, l1Backend, &sequencerTxOpts, asserterSeqInbox, asserterSeqInboxAddr, -1) + makeBatch(t, challengerL2, challengerL2Info, l1Backend, &sequencerTxOpts, challengerSeqInbox, challengerSeqInboxAddr, challengeMsgIdx-1) + + // seqNum.Add(seqNum, common.Big1) + makeBatch(t, asserterL2, asserterL2Info, l1Backend, &sequencerTxOpts, asserterSeqInbox, asserterSeqInboxAddr, -1) + makeBatch(t, challengerL2, challengerL2Info, l1Backend, &sequencerTxOpts, challengerSeqInbox, challengerSeqInboxAddr, challengeMsgIdx-MsgPerBatch-1) + + // seqNum.Add(seqNum, common.Big1) + makeBatch(t, asserterL2, asserterL2Info, l1Backend, &sequencerTxOpts, asserterSeqInbox, asserterSeqInboxAddr, -1) + makeBatch(t, challengerL2, challengerL2Info, l1Backend, &sequencerTxOpts, challengerSeqInbox, challengerSeqInboxAddr, challengeMsgIdx-MsgPerBatch*2-1) trueSeqInboxAddr := challengerSeqInboxAddr trueDelayedBridge := challengerBridgeAddr @@ -291,9 +313,14 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { if err != nil { Fail(t, err) } - wasmModuleRoot := locator.LatestWasmModuleRoot() - if (wasmModuleRoot == common.Hash{}) { - Fail(t, "latest machine not found") + var wasmModuleRoot common.Hash + if useStubs { + wasmModuleRoot = mockWasmModuleRoot + } else { + wasmModuleRoot = locator.LatestWasmModuleRoot() + if (wasmModuleRoot == common.Hash{}) { + Fail(t, "latest machine not found") + } } asserterGenesis := asserterL2.Execution.ArbInterface.BlockChain().Genesis() @@ -314,7 +341,7 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { } asserterEndGlobalState := validator.GoGlobalState{ BlockHash: asserterLatestBlock.Hash(), - Batch: 2, + Batch: 4, PosInBatch: 0, } numBlocks := asserterLatestBlock.NumberU64() - asserterGenesis.NumberU64() @@ -341,6 +368,10 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { if err != nil { Fail(t, err) } + if useStubs { + asserterRecorder := newMockRecorder(asserterValidator, asserterL2.TxStreamer) + asserterValidator.OverrideRecorder(t, asserterRecorder) + } err = asserterValidator.Start(ctx) if err != nil { Fail(t, err) @@ -354,6 +385,10 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { if err != nil { Fail(t, err) } + if useStubs { + challengerRecorder := newMockRecorder(challengerValidator, challengerL2.TxStreamer) + challengerValidator.OverrideRecorder(t, challengerRecorder) + } err = challengerValidator.Start(ctx) if err != nil { Fail(t, err) @@ -394,6 +429,19 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { if tx == nil { Fail(t, "no move") } + + if useStubs { + if len(mockSpawn.ExecSpawned) != 0 { + if len(mockSpawn.ExecSpawned) != 1 { + Fail(t, "bad number of spawned execRuns: ", len(mockSpawn.ExecSpawned)) + } + if mockSpawn.ExecSpawned[0] != uint64(challengeMsgIdx) { + Fail(t, "wrong spawned execRuns: ", mockSpawn.ExecSpawned[0], " expected: ", challengeMsgIdx) + } + return + } + } + _, err = EnsureTxSucceeded(ctx, l1Backend, tx) if err != nil { if !currentCorrect && strings.Contains(err.Error(), "BAD_SEQINBOX_MESSAGE") { @@ -419,3 +467,17 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool) { Fail(t, "challenge timed out without winner") } + +func TestChallengeManagerMockAsserterIncorrect(t *testing.T) { + t.Parallel() + for i := int64(1); i <= MsgPerBatch*3; i++ { + RunChallengeTest(t, false, true, i) + } +} + +func TestChallengeManagerMockAsserterCorrect(t *testing.T) { + t.Parallel() + for i := int64(1); i <= MsgPerBatch*3; i++ { + RunChallengeTest(t, true, true, i) + } +} diff --git a/system_tests/full_challenge_test.go b/system_tests/full_challenge_test.go index 367ac33464..5698062998 100644 --- a/system_tests/full_challenge_test.go +++ b/system_tests/full_challenge_test.go @@ -15,9 +15,9 @@ import ( ) func TestChallengeManagerFullAsserterIncorrect(t *testing.T) { - RunChallengeTest(t, false) + RunChallengeTest(t, false, false, MsgPerBatch+1) } func TestChallengeManagerFullAsserterCorrect(t *testing.T) { - RunChallengeTest(t, true) + RunChallengeTest(t, true, false, MsgPerBatch+2) } From 69a3f6633b3de0ccdd7b5b17f9b03c0a7aad981c Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Sat, 25 Mar 2023 08:10:11 -0600 Subject: [PATCH 18/63] testChallenge: fix parallelism --- system_tests/full_challenge_impl_test.go | 4 ++-- system_tests/full_challenge_test.go | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/system_tests/full_challenge_impl_test.go b/system_tests/full_challenge_impl_test.go index 24795f6237..9acbd2a1b5 100644 --- a/system_tests/full_challenge_impl_test.go +++ b/system_tests/full_challenge_impl_test.go @@ -468,14 +468,14 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool, useStubs bool, chall Fail(t, "challenge timed out without winner") } -func TestChallengeManagerMockAsserterIncorrect(t *testing.T) { +func TestMockChallengeManagerAsserterIncorrect(t *testing.T) { t.Parallel() for i := int64(1); i <= MsgPerBatch*3; i++ { RunChallengeTest(t, false, true, i) } } -func TestChallengeManagerMockAsserterCorrect(t *testing.T) { +func TestMockChallengeManagerAsserterCorrect(t *testing.T) { t.Parallel() for i := int64(1); i <= MsgPerBatch*3; i++ { RunChallengeTest(t, true, true, i) diff --git a/system_tests/full_challenge_test.go b/system_tests/full_challenge_test.go index 5698062998..c297013aba 100644 --- a/system_tests/full_challenge_test.go +++ b/system_tests/full_challenge_test.go @@ -15,9 +15,11 @@ import ( ) func TestChallengeManagerFullAsserterIncorrect(t *testing.T) { + t.Parallel() RunChallengeTest(t, false, false, MsgPerBatch+1) } func TestChallengeManagerFullAsserterCorrect(t *testing.T) { + t.Parallel() RunChallengeTest(t, true, false, MsgPerBatch+2) } From f43c5a1db0602f781a64b60b19feb14acbe5e955 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Sun, 26 Mar 2023 12:53:29 -0600 Subject: [PATCH 19/63] dont run MockChallenge tests with race detection --- system_tests/full_challenge_impl_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/system_tests/full_challenge_impl_test.go b/system_tests/full_challenge_impl_test.go index 9acbd2a1b5..9b5ca0c51b 100644 --- a/system_tests/full_challenge_impl_test.go +++ b/system_tests/full_challenge_impl_test.go @@ -1,6 +1,10 @@ // Copyright 2021-2022, Offchain Labs, Inc. // For license information, see https://github.com/nitro/blob/master/LICENSE +// race detection makes things slow and miss timeouts +//go:build !race +// +build !race + package arbtest import ( From d6cefc752e4d9fcaa589b96a2ac2c25ee7e9022b Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Tue, 18 Apr 2023 19:57:19 -0600 Subject: [PATCH 20/63] merge fixes --- arbnode/sync_monitor.go | 9 ++------- cmd/nitro/init.go | 10 +++++++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arbnode/sync_monitor.go b/arbnode/sync_monitor.go index 0febc9957b..d01c300fa9 100644 --- a/arbnode/sync_monitor.go +++ b/arbnode/sync_monitor.go @@ -70,13 +70,8 @@ func (s *SyncMonitor) SyncProgressMap() map[string]interface{} { syncing = true builtMessageCount = 0 } else { - blockNum, err := s.txStreamer.exec.MessageCountToBlockNumber(builtMessageCount) - if err != nil { - res["blockBuiltErr"] = err - syncing = true - } else { - res["blockNum"] = blockNum - } + blockNum := s.txStreamer.exec.MessageIndexToBlockNumber(builtMessageCount) + res["blockNum"] = blockNum builtMessageCount++ res["messageOfLastBlock"] = builtMessageCount } diff --git a/cmd/nitro/init.go b/cmd/nitro/init.go index a795f2bc70..dd0c6294aa 100644 --- a/cmd/nitro/init.go +++ b/cmd/nitro/init.go @@ -304,19 +304,23 @@ func findImportantRoots(ctx context.Context, chainDb ethdb.Database, stack *node } validatorDb := rawdb.NewTable(arbDb, arbnode.BlockValidatorPrefix) - lastValidated, err := staker.ReadLastValidatedFromDb(validatorDb) + lastValidated, err := staker.ReadLastValidatedInfo(validatorDb) if err != nil { return nil, err } if lastValidated != nil { - lastValidatedHeader := rawdb.ReadHeader(chainDb, lastValidated.BlockHash, lastValidated.BlockNumber) + var lastValidatedHeader *types.Header + headerNum := rawdb.ReadHeaderNumber(chainDb, lastValidated.GlobalState.BlockHash) + if headerNum != nil { + lastValidatedHeader = rawdb.ReadHeader(chainDb, lastValidated.GlobalState.BlockHash, *headerNum) + } if lastValidatedHeader != nil { err = roots.addHeader(lastValidatedHeader, false) if err != nil { return nil, err } } else { - log.Warn("missing latest validated block", "number", lastValidated.BlockNumber, "hash", lastValidated.BlockHash) + log.Warn("missing latest validated block", "hash", lastValidated.GlobalState.BlockHash) } } } else if initConfig.Prune == "full" { From 9b5f1845946b5be7b1709f9dfb32b0e8154b10e5 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 24 Apr 2023 17:41:39 -0600 Subject: [PATCH 21/63] fix block_validator test --- system_tests/block_validator_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system_tests/block_validator_test.go b/system_tests/block_validator_test.go index a20e7992d4..90b67eacc6 100644 --- a/system_tests/block_validator_test.go +++ b/system_tests/block_validator_test.go @@ -30,7 +30,7 @@ func testBlockValidatorSimple(t *testing.T, dasModeString string, simpletxloops var delayEvery int if simpletxloops > 1 { - l1NodeConfigA.BatchPoster.MaxBatchPostInterval = time.Millisecond * 500 + l1NodeConfigA.BatchPoster.MaxBatchPostDelay = time.Millisecond * 500 delayEvery = simpletxloops / 3 } From 3acf70a46d99b53de470a348ce2cd59a2ab31a4c Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 24 Apr 2023 18:46:21 -0600 Subject: [PATCH 22/63] block_validator: more detailed trace logs --- staker/block_validator.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index fe40698198..6683ebedcf 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -449,6 +449,7 @@ func (v *BlockValidator) createNextValidationEntry(ctx context.Context) (bool, e defer v.reorgMutex.RUnlock() pos := v.created() if pos > v.validated()+arbutil.MessageIndex(v.config().ForwardBlocks) { + log.Trace("create validation entry: nothing to do", "pos", pos, "validated", v.validated()) return false, nil } streamerMsgCount, err := v.streamer.GetProcessedMessageCount() @@ -456,6 +457,7 @@ func (v *BlockValidator) createNextValidationEntry(ctx context.Context) (bool, e return false, err } if pos >= streamerMsgCount { + log.Trace("create validation entry: nothing to do", "pos", pos, "streamerMsgCount", streamerMsgCount) return false, nil } msg, err := v.streamer.GetMessage(pos) @@ -502,6 +504,7 @@ func (v *BlockValidator) createNextValidationEntry(ctx context.Context) (bool, e v.nextCreateStartGS = endGS v.nextCreatePrevDelayed = msg.DelayedMessagesRead atomicStorePos(&v.createdA, pos+1) + log.Trace("create validation entry: created", "pos", pos) return true, nil } @@ -563,6 +566,7 @@ func (v *BlockValidator) sendNextRecordRequest(ctx context.Context) (bool, error } pos := v.recordSent() if pos >= v.prepared { + log.Trace("next record request: nothing to send", "pos", pos) return false, nil } validationStatus, found := v.validations.Load(pos) @@ -578,6 +582,7 @@ func (v *BlockValidator) sendNextRecordRequest(ctx context.Context) (bool, error return false, err } atomicStorePos(&v.recordSentA, pos+1) + log.Trace("next record request: sent", "pos", pos) return true, nil } @@ -629,6 +634,7 @@ validatiosLoop: v.reorgMutex.RLock() pos = v.valLoopPos if pos >= v.recordSent() { + log.Trace("advanceValidations: nothing to validate", "pos", pos) return nil, nil } validationStatus, found := v.validations.Load(pos) @@ -648,8 +654,9 @@ validatiosLoop: return &pos, nil } var wasmRoots []common.Hash - for _, run := range validationStatus.Runs { + for i, run := range validationStatus.Runs { if !run.Ready() { + log.Trace("advanceValidations: validation not ready", "pos", pos, "run", i) continue validatiosLoop } wasmRoots = append(wasmRoots, run.WasmModuleRoot()) @@ -685,6 +692,7 @@ validatiosLoop: continue } if room == 0 { + log.Trace("advanceValidations: no more room", "pos", pos) return nil, nil } if currentStatus == Prepared { @@ -705,8 +713,9 @@ validatiosLoop: defer validatorPendingValidationsGauge.Dec(1) var runs []validator.ValidationRun for _, moduleRoot := range wasmRoots { - for _, spawner := range v.validationSpawners { + for i, spawner := range v.validationSpawners { run := spawner.Launch(input, moduleRoot) + log.Trace("advanceValidations: launched", "pos", validationStatus.Entry.Pos, "moduleRoot", moduleRoot, "spawner", i) runs = append(runs, run) } } From 6a24459009190a8c13b1be2a82696bcc03194119 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 24 Apr 2023 21:28:22 -0600 Subject: [PATCH 23/63] support prooving empty genesis block --- staker/challenge_manager.go | 3 --- staker/stateless_block_validator.go | 31 +++++++++++++---------------- system_tests/staker_test.go | 2 +- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/staker/challenge_manager.go b/staker/challenge_manager.go index ae591c24e8..4609e85df6 100644 --- a/staker/challenge_manager.go +++ b/staker/challenge_manager.go @@ -458,9 +458,6 @@ func (m *ChallengeManager) createExecutionBackend(ctx context.Context, initialCo return nil } m.executionChallengeBackend = nil - if initialCount == 0 { - return errors.New("cannot validate before genesis block") - } entry, err := m.validator.CreateReadyValidationEntry(ctx, initialCount) if err != nil { return fmt.Errorf("error creating validation entry for challenge %v msg %v for execution challenge: %w", m.challengeIndex, initialCount, err) diff --git a/staker/stateless_block_validator.go b/staker/stateless_block_validator.go index 34015760e3..7cf62153c4 100644 --- a/staker/stateless_block_validator.go +++ b/staker/stateless_block_validator.go @@ -273,17 +273,19 @@ func (v *StatelessBlockValidator) ValidationEntryRecord(ctx context.Context, e * if e.Stage != ReadyForRecord { return errors.Errorf("validation entry should be ReadyForRecord, is: %v", e.Stage) } - // nothing to record for genesis - if e.Pos == 0 { - e.Stage = Ready - return nil - } - recording, err := v.recorder.RecordBlockCreation(ctx, e.Pos, e.msg) - if err != nil { - return err - } - if recording.BlockHash != e.End.BlockHash { - return fmt.Errorf("recording failed: pos %d, hash expected %v, got %v", e.Pos, e.End.BlockHash, recording.BlockHash) + if e.Pos != 0 { + recording, err := v.recorder.RecordBlockCreation(ctx, e.Pos, e.msg) + if err != nil { + return err + } + if recording.BlockHash != e.End.BlockHash { + return fmt.Errorf("recording failed: pos %d, hash expected %v, got %v", e.Pos, e.End.BlockHash, recording.BlockHash) + } + e.BatchInfo = append(e.BatchInfo, recording.BatchInfo...) + + if recording.Preimages != nil { + e.Preimages = recording.Preimages + } } if e.HasDelayedMsg { delayedMsg, err := v.inboxTracker.GetDelayedMessageBytes(e.DelayedMsgNr) @@ -296,12 +298,7 @@ func (v *StatelessBlockValidator) ValidationEntryRecord(ctx context.Context, e * } e.DelayedMsg = delayedMsg } - - e.BatchInfo = append(e.BatchInfo, recording.BatchInfo...) - - if recording.Preimages != nil { - e.Preimages = recording.Preimages - } else { + if e.Preimages == nil { e.Preimages = make(map[common.Hash][]byte) } for _, batch := range e.BatchInfo { diff --git a/system_tests/staker_test.go b/system_tests/staker_test.go index ac2389c285..7352d9ff5d 100644 --- a/system_tests/staker_test.go +++ b/system_tests/staker_test.go @@ -275,7 +275,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) } if err != nil && faultyStaker && i%2 == 1 { // Check if this is an expected error from the faulty staker. - if strings.Contains(err.Error(), "agreed with entire challenge") || strings.Contains(err.Error(), "after block -1 expected global state") { + if strings.Contains(err.Error(), "agreed with entire challenge") || strings.Contains(err.Error(), "after msg 0 expected global state") { // Expected error upon realizing you're losing the challenge. Get ready for a timeout. if !challengeMangerTimedOut { // Upgrade the ChallengeManager contract to an implementation which says challenges are always timed out From 71bc2467d9e37cd38d05d23b8997e1b1673f3ce2 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Tue, 25 Apr 2023 20:20:37 -0600 Subject: [PATCH 24/63] staker tests: use empty genesis genesis must be empty to be provable, --- system_tests/arbtrace_test.go | 2 +- system_tests/common_test.go | 69 ++++++++++++++++++++++++++++++++++- system_tests/debugapi_test.go | 2 +- system_tests/staker_test.go | 14 ++++++- 4 files changed, 82 insertions(+), 5 deletions(-) diff --git a/system_tests/arbtrace_test.go b/system_tests/arbtrace_test.go index b877e85dff..d36b9b2950 100644 --- a/system_tests/arbtrace_test.go +++ b/system_tests/arbtrace_test.go @@ -148,7 +148,7 @@ func TestArbTraceForwarding(t *testing.T) { nodeConfig := arbnode.ConfigDefaultL1Test() nodeConfig.RPC.ClassicRedirect = ipcPath nodeConfig.RPC.ClassicRedirectTimeout = time.Second - _, _, _, l2stack, _, _, _, l1stack := createTestNodeOnL1WithConfigImpl(t, ctx, true, nodeConfig, nil, nil) + _, _, _, l2stack, _, _, _, l1stack := createTestNodeOnL1WithConfigImpl(t, ctx, true, nodeConfig, nil, nil, nil) defer requireClose(t, l1stack) defer requireClose(t, l2stack) diff --git a/system_tests/common_test.go b/system_tests/common_test.go index 07ee38a234..5b7503afbf 100644 --- a/system_tests/common_test.go +++ b/system_tests/common_test.go @@ -6,6 +6,7 @@ package arbtest import ( "bytes" "context" + "encoding/hex" "fmt" "math/big" "net" @@ -83,6 +84,65 @@ func TransferBalanceTo( return tx, res } +// if l2client is not nil - will wait until balance appears in l2 +func BridgeBalance( + t *testing.T, account string, amount *big.Int, l1info info, l2info info, l1client client, l2client client, ctx context.Context, +) (*types.Transaction, *types.Receipt) { + t.Helper() + + // setup or validate the same account on l2info + l1acct := l1info.GetInfoWithPrivKey(account) + if l2info.Accounts[account] == nil { + l2info.SetFullAccountInfo(account, &AccountInfo{ + Address: l1acct.Address, + PrivateKey: l1acct.PrivateKey, + Nonce: 0, + }) + } else { + l2acct := l2info.GetInfoWithPrivKey(account) + if l2acct.PrivateKey.X.Cmp(l1acct.PrivateKey.X) != 0 || + l2acct.PrivateKey.Y.Cmp(l1acct.PrivateKey.Y) != 0 { + Fail(t, "l2 account already exists and not compatible to l1") + } + } + + // check previous balance + var l2Balance *big.Int + var err error + if l2client != nil { + l2Balance, err = l2client.BalanceAt(ctx, l2info.GetAddress("Faucet"), nil) + Require(t, err) + } + + // send transaction + data, err := hex.DecodeString("0f4d14e9000000000000000000000000000000000000000000000000000082f79cd90000") + Require(t, err) + tx := l1info.PrepareTx(account, "Inbox", l1info.TransferGas*100, amount, data) + err = l1client.SendTransaction(ctx, tx) + Require(t, err) + res, err := EnsureTxSucceeded(ctx, l1client, tx) + Require(t, err) + + // wait for balance to appear in l2 + if l2client != nil { + l2Balance.Add(l2Balance, amount) + for i := 0; true; i++ { + balance, err := l2client.BalanceAt(ctx, l2info.GetAddress("Faucet"), nil) + Require(t, err) + if balance.Cmp(l2Balance) >= 0 { + break + } + TransferBalance(t, "Faucet", "User", big.NewInt(1), l1info, l1client, ctx) + if i > 20 { + Fail(t, "bridging failed") + } + <-time.After(time.Millisecond * 100) + } + } + + return tx, res +} + func SendSignedTxViaL1( t *testing.T, ctx context.Context, @@ -423,7 +483,7 @@ func createTestNodeOnL1WithConfig( l2info info, currentNode *arbnode.Node, l2client *ethclient.Client, l1info info, l1backend *eth.Ethereum, l1client *ethclient.Client, l1stack *node.Node, ) { - l2info, currentNode, l2client, _, l1info, l1backend, l1client, l1stack = createTestNodeOnL1WithConfigImpl(t, ctx, isSequencer, nodeConfig, chainConfig, stackConfig) + l2info, currentNode, l2client, _, l1info, l1backend, l1client, l1stack = createTestNodeOnL1WithConfigImpl(t, ctx, isSequencer, nodeConfig, chainConfig, stackConfig, nil) return } @@ -434,6 +494,7 @@ func createTestNodeOnL1WithConfigImpl( nodeConfig *arbnode.Config, chainConfig *params.ChainConfig, stackConfig *node.Config, + l2info_in info, ) ( l2info info, currentNode *arbnode.Node, l2client *ethclient.Client, l2stack *node.Node, l1info info, l1backend *eth.Ethereum, l1client *ethclient.Client, l1stack *node.Node, @@ -449,7 +510,11 @@ func createTestNodeOnL1WithConfigImpl( var l2chainDb ethdb.Database var l2arbDb ethdb.Database var l2blockchain *core.BlockChain - l2info, l2stack, l2chainDb, l2arbDb, l2blockchain = createL2BlockChainWithStackConfig(t, nil, "", chainConfig, stackConfig) + l2info = l2info_in + if l2info == nil { + l2info = NewArbTestInfo(t, chainConfig.ChainID) + } + _, l2stack, l2chainDb, l2arbDb, l2blockchain = createL2BlockChainWithStackConfig(t, l2info, "", chainConfig, stackConfig) addresses := DeployOnTestL1(t, ctx, l1info, l1client, chainConfig.ChainID) var sequencerTxOptsPtr *bind.TransactOpts var dataSigner signature.DataSignerFunc diff --git a/system_tests/debugapi_test.go b/system_tests/debugapi_test.go index df954685d1..03e3dfd405 100644 --- a/system_tests/debugapi_test.go +++ b/system_tests/debugapi_test.go @@ -14,7 +14,7 @@ import ( func TestDebugAPI(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - _, _, _, l2stack, _, _, _, l1stack := createTestNodeOnL1WithConfigImpl(t, ctx, true, nil, nil, nil) + _, _, _, l2stack, _, _, _, l1stack := createTestNodeOnL1WithConfigImpl(t, ctx, true, nil, nil, nil, nil) defer requireClose(t, l1stack) defer requireClose(t, l2stack) diff --git a/system_tests/staker_test.go b/system_tests/staker_test.go index 7352d9ff5d..f89994a919 100644 --- a/system_tests/staker_test.go +++ b/system_tests/staker_test.go @@ -18,15 +18,18 @@ import ( "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/params" "github.com/offchainlabs/nitro/arbnode" + "github.com/offchainlabs/nitro/arbos/l2pricing" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/solgen/go/mocksgen" "github.com/offchainlabs/nitro/solgen/go/rollupgen" "github.com/offchainlabs/nitro/staker" + "github.com/offchainlabs/nitro/util" "github.com/offchainlabs/nitro/util/arbmath" "github.com/offchainlabs/nitro/util/colors" "github.com/offchainlabs/nitro/validator/valnode" @@ -65,7 +68,14 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) t.Parallel() ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() - l2info, l2nodeA, l2clientA, l1info, _, l1client, l1stack := createTestNodeOnL1(t, ctx, true) + var transferGas = util.NormalizeL2GasForL1GasInitial(800_000, params.GWei) // include room for aggregator L1 costs + l2chainConfig := params.ArbitrumDevTestChainConfig() + l2info := NewBlockChainTestInfo( + t, + types.NewArbitrumSigner(types.NewLondonSigner(l2chainConfig.ChainID)), big.NewInt(l2pricing.InitialBaseFeeWei*2), + transferGas, + ) + _, l2nodeA, l2clientA, _, l1info, _, l1client, l1stack := createTestNodeOnL1WithConfigImpl(t, ctx, true, nil, l2chainConfig, nil, l2info) defer requireClose(t, l1stack) defer l2nodeA.StopAndWait() @@ -87,6 +97,8 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) } } + BridgeBalance(t, "Faucet", big.NewInt(1).Mul(big.NewInt(params.Ether), big.NewInt(10000)), l1info, l2info, l1client, l2clientA, ctx) + deployAuth := l1info.GetDefaultTransactOpts("RollupOwner", ctx) balance := big.NewInt(params.Ether) From 66219ff69d4aa39f2706d17d86ad0134a6cfdb5b Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 28 Apr 2023 19:31:55 -0600 Subject: [PATCH 25/63] seq_coordinator: only take over if all blocks were built --- arbnode/seq_coordinator.go | 47 ++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/arbnode/seq_coordinator.go b/arbnode/seq_coordinator.go index bd63e63d01..de5a73039b 100644 --- a/arbnode/seq_coordinator.go +++ b/arbnode/seq_coordinator.go @@ -619,30 +619,37 @@ func (c *SeqCoordinator) update(ctx context.Context) time.Duration { log.Error("myurl main sequencer, but no sequencer exists") return c.noRedisError() } - // we're here because we don't currently hold the lock - // sequencer is already either paused or forwarding - c.sequencer.Pause() - err := c.acquireLockoutAndWriteMessage(ctx, localMsgCount, localMsgCount, nil) + processedMessages, err := c.streamer.GetProcessedMessageCount() if err != nil { - // this could be just new messages we didn't get yet - even then, we should retry soon - log.Info("sequencer failed to become chosen", "err", err, "msgcount", localMsgCount) - // make sure we're marked as wanting the lockout - if err := c.wantsLockoutUpdate(ctx); err != nil { - log.Warn("failed to update wants lockout key", "err", err) - } - c.prevChosenSequencer = "" - return c.retryAfterRedisError() - } - log.Info("caught chosen-coordinator lock", "myUrl", c.config.MyUrl()) - if c.delayedSequencer != nil { - err = c.delayedSequencer.ForceSequenceDelayed(ctx) + log.Warn("coordinator: failed to read processed message count", "err", err) + processedMessages = 0 + } + if processedMessages >= localMsgCount { + // we're here because we don't currently hold the lock + // sequencer is already either paused or forwarding + c.sequencer.Pause() + err := c.acquireLockoutAndWriteMessage(ctx, localMsgCount, localMsgCount, nil) if err != nil { - log.Warn("failed sequencing delayed messages after catching lock", "err", err) + // this could be just new messages we didn't get yet - even then, we should retry soon + log.Info("sequencer failed to become chosen", "err", err, "msgcount", localMsgCount) + // make sure we're marked as wanting the lockout + if err := c.wantsLockoutUpdate(ctx); err != nil { + log.Warn("failed to update wants lockout key", "err", err) + } + c.prevChosenSequencer = "" + return c.retryAfterRedisError() } + log.Info("caught chosen-coordinator lock", "myUrl", c.config.MyUrl()) + if c.delayedSequencer != nil { + err = c.delayedSequencer.ForceSequenceDelayed(ctx) + if err != nil { + log.Warn("failed sequencing delayed messages after catching lock", "err", err) + } + } + c.sequencer.Activate() + c.prevChosenSequencer = c.config.MyUrl() + return c.noRedisError() } - c.sequencer.Activate() - c.prevChosenSequencer = c.config.MyUrl() - return c.noRedisError() } // update wanting the lockout From 78ff09c3e67b50f57884df342b49e7c6970a7221 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 25 May 2023 21:01:12 -0600 Subject: [PATCH 26/63] fix block_recording for new arbos --- arbnode/execution/block_recorder.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arbnode/execution/block_recorder.go b/arbnode/execution/block_recorder.go index 5152f33ebb..63a1070db3 100644 --- a/arbnode/execution/block_recorder.go +++ b/arbnode/execution/block_recorder.go @@ -115,6 +115,10 @@ func (r *BlockRecorder) RecordBlockCreation( if err != nil { return nil, fmt.Errorf("error getting genesis block number from initial ArbOS state: %w", err) } + _, err = initialArbosState.ChainConfig() + if err != nil { + return nil, fmt.Errorf("error getting chain config from initial ArbOS state: %w", err) + } expectedNum := chainConfig.ArbitrumChainParams.GenesisBlockNum if genesisNum != expectedNum { return nil, fmt.Errorf("unexpected genesis block number %v in ArbOS state, expected %v", genesisNum, expectedNum) From 472fec0ecde916197df03d283fde757597d8d9bd Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 14 Jun 2023 17:11:16 -0600 Subject: [PATCH 27/63] fix merge errors --- arbnode/api.go | 1 - system_tests/full_challenge_impl_test.go | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arbnode/api.go b/arbnode/api.go index 6ca9eeefd7..89287b8d59 100644 --- a/arbnode/api.go +++ b/arbnode/api.go @@ -12,7 +12,6 @@ import ( "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/staker" "github.com/offchainlabs/nitro/validator" - "github.com/pkg/errors" ) type BlockValidatorAPI struct { diff --git a/system_tests/full_challenge_impl_test.go b/system_tests/full_challenge_impl_test.go index 9c1be71f3e..bf5221e5f7 100644 --- a/system_tests/full_challenge_impl_test.go +++ b/system_tests/full_challenge_impl_test.go @@ -288,7 +288,7 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool, useStubs bool, chall challengerL2Info.SetFullAccountInfo("Destination", asserterL2Info.GetInfoWithPrivKey("Destination")) if challengeMsgIdx < 1 || challengeMsgIdx > 3*MsgPerBatch { - Fail(t, "challengeMsgIdx illegal") + Fatal(t, "challengeMsgIdx illegal") } // seqNum := common.Big2 @@ -437,10 +437,10 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool, useStubs bool, chall if useStubs { if len(mockSpawn.ExecSpawned) != 0 { if len(mockSpawn.ExecSpawned) != 1 { - Fail(t, "bad number of spawned execRuns: ", len(mockSpawn.ExecSpawned)) + Fatal(t, "bad number of spawned execRuns: ", len(mockSpawn.ExecSpawned)) } if mockSpawn.ExecSpawned[0] != uint64(challengeMsgIdx) { - Fail(t, "wrong spawned execRuns: ", mockSpawn.ExecSpawned[0], " expected: ", challengeMsgIdx) + Fatal(t, "wrong spawned execRuns: ", mockSpawn.ExecSpawned[0], " expected: ", challengeMsgIdx) } return } From f5a189be6ca80e6a7f50f08daaaadbda1690694b Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 22 Jun 2023 19:35:53 -0600 Subject: [PATCH 28/63] block_validator: simplify preparing record --- staker/block_validator.go | 103 +++++++++++++++----------------------- 1 file changed, 40 insertions(+), 63 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index a3a505045e..d12e1ce8e0 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -49,10 +49,6 @@ type BlockValidator struct { nextCreateStartGS validator.GoGlobalState nextCreatePrevDelayed uint64 - // only used by record loop or holding reorg-write - prepared arbutil.MessageIndex - nextRecordPrepared *containers.Promise[arbutil.MessageIndex] - // can only be accessed from from validation thread or if holding reorg-write lastValidGS validator.GoGlobalState valLoopPos arbutil.MessageIndex @@ -521,74 +517,58 @@ func (v *BlockValidator) iterativeValidationEntryCreator(ctx context.Context, ig return v.config().ValidationPoll } -func (v *BlockValidator) sendNextRecordPrepare() error { - if v.nextRecordPrepared != nil { - if v.nextRecordPrepared.Ready() { - prepared, err := v.nextRecordPrepared.Current() - if err != nil { - return err - } - if prepared > v.prepared { - v.prepared = prepared - } - v.nextRecordPrepared = nil - } else { - return nil - } - } - nextPrepared := v.validated() + arbutil.MessageIndex(v.config().PrerecordedBlocks) +func (v *BlockValidator) sendNextRecordRequests(ctx context.Context) (bool, error) { + v.reorgMutex.RLock() + pos := v.recordSent() created := v.created() - if nextPrepared > created { - nextPrepared = created - } - if v.prepared >= nextPrepared { - return nil - } - nextPromise := containers.NewPromise[arbutil.MessageIndex](nil) - v.LaunchThread(func(ctx context.Context) { - err := v.recorder.PrepareForRecord(ctx, v.prepared, nextPrepared-1) - if err != nil { - nextPromise.ProduceError(err) - } else { - nextPromise.Produce(nextPrepared) - nonBlockingTriger(v.sendRecordChan) - } - }) - v.nextRecordPrepared = &nextPromise - return nil -} + validated := v.validated() + v.reorgMutex.RUnlock() -func (v *BlockValidator) sendNextRecordRequest(ctx context.Context) (bool, error) { - v.reorgMutex.RLock() - defer v.reorgMutex.RUnlock() - err := v.sendNextRecordPrepare() - if err != nil { - return false, err + recordUntil := validated + arbutil.MessageIndex(v.config().PrerecordedBlocks) - 1 + if recordUntil > created-1 { + recordUntil = created - 1 } - pos := v.recordSent() - if pos >= v.prepared { - log.Trace("next record request: nothing to send", "pos", pos) + if recordUntil < pos { return false, nil } - validationStatus, found := v.validations.Load(pos) - if !found { - return false, fmt.Errorf("not found entry for pos %d", pos) - } - currentStatus := validationStatus.getStatus() - if currentStatus != Created { - return false, fmt.Errorf("bad status trying to send recordings for pos %d status: %v", pos, currentStatus) - } - err = v.sendRecord(validationStatus) + log.Trace("preparing to record", "pos", pos, "until", recordUntil) + // prepare could take a long time so we do it without a lock + err := v.recorder.PrepareForRecord(ctx, pos, recordUntil) if err != nil { return false, err } - atomicStorePos(&v.recordSentA, pos+1) - log.Trace("next record request: sent", "pos", pos) + + v.reorgMutex.RLock() + defer v.reorgMutex.RUnlock() + createdNew := v.created() + recordSentNew := v.recordSent() + if createdNew < created || recordSentNew < pos { + // there was a relevant reorg - quit and restart + return true, nil + } + for pos <= recordUntil { + validationStatus, found := v.validations.Load(pos) + if !found { + return false, fmt.Errorf("not found entry for pos %d", pos) + } + currentStatus := validationStatus.getStatus() + if currentStatus != Created { + return false, fmt.Errorf("bad status trying to send recordings for pos %d status: %v", pos, currentStatus) + } + err := v.sendRecord(validationStatus) + if err != nil { + return false, err + } + pos += 1 + atomicStorePos(&v.recordSentA, pos) + log.Trace("next record request: sent", "pos", pos) + } + return true, nil } func (v *BlockValidator) iterativeValidationEntryRecorder(ctx context.Context, ignored struct{}) time.Duration { - moreWork, err := v.sendNextRecordRequest(ctx) + moreWork, err := v.sendNextRecordRequests(ctx) if err != nil { log.Error("error trying to record for validation node", "err", err) } @@ -852,9 +832,6 @@ func (v *BlockValidator) Reorg(ctx context.Context, count arbutil.MessageIndex) log.Error("failed writing valid state after reorg", "err", err) } } - if v.prepared > count { - v.prepared = count - } nonBlockingTriger(v.createNodesChan) return nil } From 2bd6ac632349522fc7b788b733bf9e046305bec1 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 23 Jun 2023 10:53:39 -0600 Subject: [PATCH 29/63] block_validator: iterative print validated --- staker/block_validator.go | 41 ++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index d12e1ce8e0..068612fe99 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -50,9 +50,11 @@ type BlockValidator struct { nextCreatePrevDelayed uint64 // can only be accessed from from validation thread or if holding reorg-write - lastValidGS validator.GoGlobalState - valLoopPos arbutil.MessageIndex - validInfoPrintTime time.Time + lastValidGS validator.GoGlobalState + valLoopPos arbutil.MessageIndex + + // only from logger thread + lastValidInfoPrinted *GlobalStateValidatedInfo // can be read by anyone holding reorg-read // written by appropriate thread or reorg-write @@ -578,13 +580,33 @@ func (v *BlockValidator) iterativeValidationEntryRecorder(ctx context.Context, i return v.config().ValidationPoll } -func (v *BlockValidator) maybePrintNewlyValid() { - if time.Since(v.validInfoPrintTime) > time.Second { - log.Info("result validated", "count", v.validated(), "blockHash", v.lastValidGS.BlockHash) - v.validInfoPrintTime = time.Now() +func (v *BlockValidator) iterativeValidationPrint(ctx context.Context) time.Duration { + validated, err := v.ReadLastValidatedInfo() + if err != nil { + log.Error("cannot read last validated data from database", "err", err) + return time.Second * 30 + } + if validated == nil { + return time.Second + } + if v.lastValidInfoPrinted != nil { + if v.lastValidInfoPrinted.GlobalState.BlockHash == validated.GlobalState.BlockHash { + return time.Second + } + } + var batchMsgs arbutil.MessageIndex + var printedCount int64 + if validated.GlobalState.Batch > 0 { + batchMsgs, err = v.inboxTracker.GetBatchMessageCount(validated.GlobalState.Batch) + } + if err != nil { + printedCount = -1 } else { - log.Trace("result validated", "count", v.validated(), "blockHash", v.lastValidGS.BlockHash) + printedCount = int64(batchMsgs) + int64(validated.GlobalState.PosInBatch) } + log.Info("validated execution", "messageCount", printedCount, "globalstate", validated.GlobalState, "WasmRoots", validated.WasmRoots) + v.lastValidInfoPrinted = validated + return time.Second } // return val: @@ -669,7 +691,7 @@ validatiosLoop: if v.testingProgressMadeChan != nil { nonBlockingTriger(v.testingProgressMadeChan) } - v.maybePrintNewlyValid() + log.Trace("result validated", "count", v.validated(), "blockHash", v.lastValidGS.BlockHash) continue } if room == 0 { @@ -893,6 +915,7 @@ func (v *BlockValidator) LaunchWorkthreadsWhenCaughtUp(ctx context.Context) { func (v *BlockValidator) Start(ctxIn context.Context) error { v.StopWaiter.Start(ctxIn, v) v.LaunchThread(v.LaunchWorkthreadsWhenCaughtUp) + v.CallIteratively(v.iterativeValidationPrint) return nil } From 6ea61f7886ef39c8ab911fd5fdfe9dccd667c052 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 23 Jun 2023 11:42:04 -0600 Subject: [PATCH 30/63] fix typos and some renaming --- arbnode/api.go | 2 +- arbnode/execution/block_recorder.go | 4 ++-- staker/block_validator.go | 28 ++++++++++++++-------------- staker/challenge_manager.go | 2 +- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arbnode/api.go b/arbnode/api.go index 89287b8d59..d28d7481d9 100644 --- a/arbnode/api.go +++ b/arbnode/api.go @@ -18,7 +18,7 @@ type BlockValidatorAPI struct { val *staker.BlockValidator } -func (a *BlockValidatorAPI) LatestValidatedMsgNum(ctx context.Context) (*staker.GlobalStateValidatedInfo, error) { +func (a *BlockValidatorAPI) LatestValidated(ctx context.Context) (*staker.GlobalStateValidatedInfo, error) { return a.val.ReadLastValidatedInfo() } diff --git a/arbnode/execution/block_recorder.go b/arbnode/execution/block_recorder.go index 63a1070db3..79da88821c 100644 --- a/arbnode/execution/block_recorder.go +++ b/arbnode/execution/block_recorder.go @@ -19,10 +19,10 @@ import ( ) // BlockRecorder uses a separate statedatabase from the blockchain. -// It has access to any state in the HD database, and can compute state as needed. +// It has access to any state in the ethdb (hard-disk) database, and can compute state as needed. // We keep references for state of: // Any block that matches PrepareForRecord that was done recently (according to PrepareDelay config) -// Most recent/advanced header we ever omputed (lastHdr) +// Most recent/advanced header we ever computed (lastHdr) // Hopefully - some recent valid block. For that we always keep one candidate block until it becomes validated. type BlockRecorder struct { recordingDatabase *arbitrum.RecordingDatabase diff --git a/staker/block_validator.go b/staker/block_validator.go index 068612fe99..f32ca5ef65 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -250,7 +250,7 @@ func (v *BlockValidator) possiblyFatal(err error) { } } -func nonBlockingTriger(channel chan struct{}) { +func nonBlockingTrigger(channel chan struct{}) { select { case channel <- struct{}{}: default: @@ -329,7 +329,7 @@ func GlobalStateToMsgCount(tracker InboxTrackerInterface, streamer TransactionSt return true, count, nil } -func (v *BlockValidator) checkValidatedGSCaughUp(ctx context.Context) (bool, error) { +func (v *BlockValidator) checkValidatedGSCaughtUp(ctx context.Context) (bool, error) { v.reorgMutex.Lock() defer v.reorgMutex.Unlock() if v.chainCaughtUp { @@ -381,7 +381,7 @@ func (v *BlockValidator) sendRecord(s *validationStatus) error { log.Error("Fault trying to update validation with recording", "entry", s.Entry, "status", s.getStatus()) return } - nonBlockingTriger(v.progressValidationsChan) + nonBlockingTrigger(v.progressValidationsChan) }) return nil } @@ -627,7 +627,7 @@ func (v *BlockValidator) advanceValidations(ctx context.Context) (*arbutil.Messa } } pos := v.validated() - 1 // to reverse the first +1 in the loop -validatiosLoop: +validationsLoop: for { if ctx.Err() != nil { return nil, ctx.Err() @@ -660,7 +660,7 @@ validatiosLoop: for i, run := range validationStatus.Runs { if !run.Ready() { log.Trace("advanceValidations: validation not ready", "pos", pos, "run", i) - continue validatiosLoop + continue validationsLoop } wasmRoots = append(wasmRoots, run.WasmModuleRoot()) runEnd, err := run.Current() @@ -685,11 +685,11 @@ validatiosLoop: log.Error("failed writing new validated to database", "pos", pos, "err", err) } atomicStorePos(&v.validatedA, pos+1) - nonBlockingTriger(v.createNodesChan) - nonBlockingTriger(v.sendRecordChan) + nonBlockingTrigger(v.createNodesChan) + nonBlockingTrigger(v.sendRecordChan) validatorMsgCountValidatedGauge.Update(int64(pos + 1)) if v.testingProgressMadeChan != nil { - nonBlockingTriger(v.testingProgressMadeChan) + nonBlockingTrigger(v.testingProgressMadeChan) } log.Trace("result validated", "count", v.validated(), "blockHash", v.lastValidGS.BlockHash) continue @@ -735,7 +735,7 @@ validatiosLoop: return } } - nonBlockingTriger(v.progressValidationsChan) + nonBlockingTrigger(v.progressValidationsChan) }) room-- } @@ -854,7 +854,7 @@ func (v *BlockValidator) Reorg(ctx context.Context, count arbutil.MessageIndex) log.Error("failed writing valid state after reorg", "err", err) } } - nonBlockingTriger(v.createNodesChan) + nonBlockingTrigger(v.createNodesChan) return nil } @@ -885,7 +885,7 @@ func (v *BlockValidator) Initialize(ctx context.Context) error { func (v *BlockValidator) LaunchWorkthreadsWhenCaughtUp(ctx context.Context) { for { - caughtUp, err := v.checkValidatedGSCaughUp(ctx) + caughtUp, err := v.checkValidatedGSCaughtUp(ctx) if err != nil { log.Error("validator got error waiting for chain to catch up", "err", err) } @@ -925,8 +925,8 @@ func (v *BlockValidator) StopAndWait() { // WaitForPos can only be used from One thread func (v *BlockValidator) WaitForPos(t *testing.T, ctx context.Context, pos arbutil.MessageIndex, timeout time.Duration) bool { - trigerchan := make(chan struct{}) - v.testingProgressMadeChan = trigerchan + triggerchan := make(chan struct{}) + v.testingProgressMadeChan = triggerchan timer := time.NewTimer(timeout) defer timer.Stop() lastLoop := false @@ -940,7 +940,7 @@ func (v *BlockValidator) WaitForPos(t *testing.T, ctx context.Context, pos arbut select { case <-timer.C: lastLoop = true - case <-trigerchan: + case <-triggerchan: case <-ctx.Done(): lastLoop = true } diff --git a/staker/challenge_manager.go b/staker/challenge_manager.go index 1d3e2ca723..ac2ae8835a 100644 --- a/staker/challenge_manager.go +++ b/staker/challenge_manager.go @@ -560,7 +560,7 @@ func (m *ChallengeManager) Act(ctx context.Context) (*types.Transaction, error) return nil, fmt.Errorf("error creating execution backend: %w", err) } machineStepCount := m.machineFinalStepCount - log.Info("issuing one step proof", "challenge", m.challengeIndex, "machineStepCount", machineStepCount, "initial count", m.initialMachineMessageCount) + log.Info("issuing one step proof", "challenge", m.challengeIndex, "machineStepCount", machineStepCount, "initialCount", m.initialMachineMessageCount) return m.blockChallengeBackend.IssueExecChallenge( m.challengeCore, state, From 60e98222c53fa4dded5957d8c9ccd73934c1a545 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 23 Jun 2023 12:48:57 -0600 Subject: [PATCH 31/63] make reorg-to-block an init option --- arbnode/execution/blockchain.go | 16 ---------------- arbnode/node.go | 14 +------------- cmd/nitro/init.go | 3 +++ cmd/nitro/nitro.go | 21 ++++++++++++++++++--- 4 files changed, 22 insertions(+), 32 deletions(-) diff --git a/arbnode/execution/blockchain.go b/arbnode/execution/blockchain.go index 2ed0221b04..08b9aab305 100644 --- a/arbnode/execution/blockchain.go +++ b/arbnode/execution/blockchain.go @@ -187,22 +187,6 @@ func shouldPreserveFalse(_ *types.Header) bool { return false } -func ReorgToBlock(chain *core.BlockChain, blockNum uint64) (*types.Block, error) { - genesisNum := chain.Config().ArbitrumChainParams.GenesisBlockNum - if blockNum < genesisNum { - return nil, fmt.Errorf("cannot reorg to block %v past nitro genesis of %v", blockNum, genesisNum) - } - reorgingToBlock := chain.GetBlockByNumber(blockNum) - if reorgingToBlock == nil { - return nil, fmt.Errorf("didn't find reorg target block number %v", blockNum) - } - err := chain.ReorgToOldBlock(reorgingToBlock) - if err != nil { - return nil, err - } - return reorgingToBlock, nil -} - func init() { gethhook.RequireHookedGeth() } diff --git a/arbnode/node.go b/arbnode/node.go index 319f751772..4842a66b32 100644 --- a/arbnode/node.go +++ b/arbnode/node.go @@ -474,18 +474,15 @@ func ConfigDefaultL2Test() *Config { } type DangerousConfig struct { - NoL1Listener bool `koanf:"no-l1-listener"` - ReorgToBlock int64 `koanf:"reorg-to-block"` + NoL1Listener bool `koanf:"no-l1-listener"` } var DefaultDangerousConfig = DangerousConfig{ NoL1Listener: false, - ReorgToBlock: -1, } func DangerousConfigAddOptions(prefix string, f *flag.FlagSet) { f.Bool(prefix+".no-l1-listener", DefaultDangerousConfig.NoL1Listener, "DANGEROUS! disables listening to L1. To be used in test nodes only") - f.Int64(prefix+".reorg-to-block", DefaultDangerousConfig.ReorgToBlock, "DANGEROUS! forces a reorg to an old block height. To be used for testing only. -1 to disable") } type Node struct { @@ -585,15 +582,6 @@ func createNodeImpl( l2Config := l2BlockChain.Config() l2ChainId := l2Config.ChainID.Uint64() - //TODO: - // var reorgingToBlock *types.Block - if config.Dangerous.ReorgToBlock >= 0 { - _, err = execution.ReorgToBlock(l2BlockChain, uint64(config.Dangerous.ReorgToBlock)) - if err != nil { - return nil, err - } - } - syncMonitor := NewSyncMonitor(&config.SyncMonitor) var classicOutbox *ClassicOutboxRetriever classicMsgDb, err := stack.OpenDatabase("classic-msg", 0, 0, "", true) diff --git a/cmd/nitro/init.go b/cmd/nitro/init.go index cbaadd3abc..8623e62bb9 100644 --- a/cmd/nitro/init.go +++ b/cmd/nitro/init.go @@ -58,6 +58,7 @@ type InitConfig struct { ThenQuit bool `koanf:"then-quit"` Prune string `koanf:"prune"` PruneBloomSize uint64 `koanf:"prune-bloom-size"` + ResetToMsg int64 `koanf:"reset-to-message"` } var InitConfigDefault = InitConfig{ @@ -73,6 +74,7 @@ var InitConfigDefault = InitConfig{ ThenQuit: false, Prune: "", PruneBloomSize: 2048, + ResetToMsg: -1, } func InitConfigAddOptions(prefix string, f *flag.FlagSet) { @@ -89,6 +91,7 @@ func InitConfigAddOptions(prefix string, f *flag.FlagSet) { f.Uint(prefix+".accounts-per-sync", InitConfigDefault.AccountsPerSync, "during init - sync database every X accounts. Lower value for low-memory systems. 0 disables.") f.String(prefix+".prune", InitConfigDefault.Prune, "pruning for a given use: \"full\" for full nodes serving RPC requests, or \"validator\" for validators") f.Uint64(prefix+".prune-bloom-size", InitConfigDefault.PruneBloomSize, "the amount of memory in megabytes to use for the pruning bloom filter (higher values prune better)") + f.Int64(prefix+".reset-to-message", InitConfigDefault.ResetToMsg, "forces a reset to an old message height. Also set max-reorg-resequence-depth=0 to force re-reading messages") } func downloadInit(ctx context.Context, initConfig *InitConfig) (string, error) { diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 0035171078..0eeb6cfade 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -38,6 +38,7 @@ import ( "github.com/offchainlabs/nitro/arbnode" "github.com/offchainlabs/nitro/arbnode/execution" + "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/cmd/chaininfo" "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/genericconf" @@ -356,7 +357,7 @@ func mainImpl() int { return 1 } - if nodeConfig.Init.ThenQuit { + if nodeConfig.Init.ThenQuit && nodeConfig.Init.ResetToMsg < 0 { return 0 } @@ -458,6 +459,8 @@ func mainImpl() int { err = currentNode.Start(ctx) if err != nil { fatalErrChan <- fmt.Errorf("error starting node: %w", err) + } else { + defer currentNode.StopAndWait() } } @@ -465,6 +468,20 @@ func mainImpl() int { signal.Notify(sigint, os.Interrupt, syscall.SIGTERM) exitCode := 0 + + if err == nil && nodeConfig.Init.ResetToMsg > 0 { + err = currentNode.TxStreamer.ReorgTo(arbutil.MessageIndex(nodeConfig.Init.ResetToMsg)) + if err != nil { + fatalErrChan <- fmt.Errorf("error reseting message: %w", err) + exitCode = 1 + } + if nodeConfig.Init.ThenQuit { + close(sigint) + + return exitCode + } + } + select { case err := <-fatalErrChan: log.Error("shutting down due to fatal error", "err", err) @@ -477,8 +494,6 @@ func mainImpl() int { // cause future ctrl+c's to panic close(sigint) - currentNode.StopAndWait() - return exitCode } From e1e6cda086d59e834d3e5fd155d10b092d7ca8f4 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 23 Jun 2023 13:23:20 -0600 Subject: [PATCH 32/63] renames and comment fixes --- staker/stateless_block_validator.go | 2 +- system_tests/full_challenge_impl_test.go | 14 +++++++------- system_tests/full_challenge_test.go | 4 ++-- system_tests/twonodeslong_test.go | 1 + 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/staker/stateless_block_validator.go b/staker/stateless_block_validator.go index 1e3cd7da62..94090092ef 100644 --- a/staker/stateless_block_validator.go +++ b/staker/stateless_block_validator.go @@ -174,7 +174,7 @@ type validationEntry struct { msg *arbostypes.MessageWithMetadata // Has batch when created - others could be added on record BatchInfo []validator.BatchInfo - // Valid since Recorded + // Valid since Ready Preimages map[common.Hash][]byte DelayedMsg []byte } diff --git a/system_tests/full_challenge_impl_test.go b/system_tests/full_challenge_impl_test.go index bf5221e5f7..0d44d69625 100644 --- a/system_tests/full_challenge_impl_test.go +++ b/system_tests/full_challenge_impl_test.go @@ -142,13 +142,13 @@ func writeTxToBatch(writer io.Writer, tx *types.Transaction) error { return err } -const MsgPerBatch = int64(5) +const makeBatch_MsgsPerBatch = int64(5) func makeBatch(t *testing.T, l2Node *arbnode.Node, l2Info *BlockchainTestInfo, backend *ethclient.Client, sequencer *bind.TransactOpts, seqInbox *mocksgen.SequencerInboxStub, seqInboxAddr common.Address, modStep int64) { ctx := context.Background() batchBuffer := bytes.NewBuffer([]byte{}) - for i := int64(0); i < MsgPerBatch; i++ { + for i := int64(0); i < makeBatch_MsgsPerBatch; i++ { value := i if i == modStep { value++ @@ -287,7 +287,7 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool, useStubs bool, chall asserterL2Info.GenerateAccount("Destination") challengerL2Info.SetFullAccountInfo("Destination", asserterL2Info.GetInfoWithPrivKey("Destination")) - if challengeMsgIdx < 1 || challengeMsgIdx > 3*MsgPerBatch { + if challengeMsgIdx < 1 || challengeMsgIdx > 3*makeBatch_MsgsPerBatch { Fatal(t, "challengeMsgIdx illegal") } @@ -297,11 +297,11 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool, useStubs bool, chall // seqNum.Add(seqNum, common.Big1) makeBatch(t, asserterL2, asserterL2Info, l1Backend, &sequencerTxOpts, asserterSeqInbox, asserterSeqInboxAddr, -1) - makeBatch(t, challengerL2, challengerL2Info, l1Backend, &sequencerTxOpts, challengerSeqInbox, challengerSeqInboxAddr, challengeMsgIdx-MsgPerBatch-1) + makeBatch(t, challengerL2, challengerL2Info, l1Backend, &sequencerTxOpts, challengerSeqInbox, challengerSeqInboxAddr, challengeMsgIdx-makeBatch_MsgsPerBatch-1) // seqNum.Add(seqNum, common.Big1) makeBatch(t, asserterL2, asserterL2Info, l1Backend, &sequencerTxOpts, asserterSeqInbox, asserterSeqInboxAddr, -1) - makeBatch(t, challengerL2, challengerL2Info, l1Backend, &sequencerTxOpts, challengerSeqInbox, challengerSeqInboxAddr, challengeMsgIdx-MsgPerBatch*2-1) + makeBatch(t, challengerL2, challengerL2Info, l1Backend, &sequencerTxOpts, challengerSeqInbox, challengerSeqInboxAddr, challengeMsgIdx-makeBatch_MsgsPerBatch*2-1) trueSeqInboxAddr := challengerSeqInboxAddr trueDelayedBridge := challengerBridgeAddr @@ -474,14 +474,14 @@ func RunChallengeTest(t *testing.T, asserterIsCorrect bool, useStubs bool, chall func TestMockChallengeManagerAsserterIncorrect(t *testing.T) { t.Parallel() - for i := int64(1); i <= MsgPerBatch*3; i++ { + for i := int64(1); i <= makeBatch_MsgsPerBatch*3; i++ { RunChallengeTest(t, false, true, i) } } func TestMockChallengeManagerAsserterCorrect(t *testing.T) { t.Parallel() - for i := int64(1); i <= MsgPerBatch*3; i++ { + for i := int64(1); i <= makeBatch_MsgsPerBatch*3; i++ { RunChallengeTest(t, true, true, i) } } diff --git a/system_tests/full_challenge_test.go b/system_tests/full_challenge_test.go index c297013aba..a960e7f640 100644 --- a/system_tests/full_challenge_test.go +++ b/system_tests/full_challenge_test.go @@ -16,10 +16,10 @@ import ( func TestChallengeManagerFullAsserterIncorrect(t *testing.T) { t.Parallel() - RunChallengeTest(t, false, false, MsgPerBatch+1) + RunChallengeTest(t, false, false, makeBatch_MsgsPerBatch+1) } func TestChallengeManagerFullAsserterCorrect(t *testing.T) { t.Parallel() - RunChallengeTest(t, true, false, MsgPerBatch+2) + RunChallengeTest(t, true, false, makeBatch_MsgsPerBatch+2) } diff --git a/system_tests/twonodeslong_test.go b/system_tests/twonodeslong_test.go index d44ce5809e..3987e5cf7b 100644 --- a/system_tests/twonodeslong_test.go +++ b/system_tests/twonodeslong_test.go @@ -174,6 +174,7 @@ func testTwoNodesLong(t *testing.T, dasModeStr string) { lastBlockHeader, err := l2clientB.HeaderByNumber(ctx, nil) Require(t, err) timeout := getDeadlineTimeout(t, time.Minute*30) + // messageindex is same as block number here if !nodeB.BlockValidator.WaitForPos(t, ctx, arbutil.MessageIndex(lastBlockHeader.Number.Uint64()), timeout) { Fatal(t, "did not validate all blocks") } From c1994bb777bdaeceef5ce7f7ee09e2c7f46de86a Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 23 Jun 2023 13:33:35 -0600 Subject: [PATCH 33/63] fix GlobalPositionAtCount --- staker/stateless_block_validator.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/staker/stateless_block_validator.go b/staker/stateless_block_validator.go index 94090092ef..0242daa3c7 100644 --- a/staker/stateless_block_validator.go +++ b/staker/stateless_block_validator.go @@ -91,6 +91,8 @@ type GlobalStatePosition struct { PosInBatch uint64 } +// return the globalState position before and after processing message at the specified count +// batch-number must be provided by caller func GlobalStatePositionsAtCount( tracker InboxTrackerInterface, count arbutil.MessageIndex, @@ -329,8 +331,12 @@ func buildGlobalState(res execution.MessageResult, pos GlobalStatePosition) vali } } +// return the globalState position before and after processing message at the specified count func (v *StatelessBlockValidator) GlobalStatePositionsAtCount(count arbutil.MessageIndex) (GlobalStatePosition, GlobalStatePosition, error) { if count == 0 { + return GlobalStatePosition{}, GlobalStatePosition{}, errors.New("no initial state for count==0") + } + if count == 1 { return GlobalStatePosition{}, GlobalStatePosition{1, 0}, nil } batchCount, err := v.inboxTracker.GetBatchCount() From 11689fd37796fc83cd8ab6139f0517208861b241 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 26 Jun 2023 11:35:15 -0600 Subject: [PATCH 34/63] l1_validator fixes --- staker/block_validator.go | 6 +++++- staker/l1_validator.go | 36 +++++++++++++++++++++++++----------- system_tests/staker_test.go | 2 +- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index f32ca5ef65..b3653e465a 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -291,7 +291,11 @@ func GlobalStateToMsgCount(tracker InboxTrackerInterface, streamer TransactionSt if err != nil { return false, 0, err } - if batchCount <= gs.Batch { + requiredBatchCount := gs.Batch + 1 + if gs.PosInBatch == 0 { + requiredBatchCount -= 1 + } + if batchCount < requiredBatchCount { return false, 0, nil } var prevBatchMsgCount arbutil.MessageIndex diff --git a/staker/l1_validator.go b/staker/l1_validator.go index bfb0126128..7d4dcdddf8 100644 --- a/staker/l1_validator.go +++ b/staker/l1_validator.go @@ -239,7 +239,7 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta caughtUp, startCount, err := GlobalStateToMsgCount(v.inboxTracker, v.txStreamer, startState.GlobalState) if err != nil { - return nil, false, err + return nil, false, fmt.Errorf("start state not in chain: %w", err) } if !caughtUp { target := GlobalStatePosition{ @@ -359,31 +359,44 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta wrongNodesExist = true continue } + afterGS := nd.AfterState().GlobalState + requiredBatch := afterGS.Batch + if afterGS.PosInBatch == 0 && afterGS.Batch > 0 { + requiredBatch -= 1 + } + if localBatchCount <= requiredBatch { + log.Info("staker: waiting for node to catch up to assertion batch", "current", localBatchCount, "target", requiredBatch-1) + return nil, false, nil + } + nodeBatchMsgCount, err := v.inboxTracker.GetBatchMessageCount(requiredBatch) + if err != nil { + return nil, false, err + } + if validatedCount < nodeBatchMsgCount { + log.Info("staker: waiting for validator to catch up to assertion batch messages", "current", validatedCount, "target", nodeBatchMsgCount) + return nil, false, nil + } if nd.Assertion.AfterState.MachineStatus != validator.MachineStatusFinished { wrongNodesExist = true log.Error("Found incorrect assertion: Machine status not finished", "node", nd.NodeNum, "machineStatus", nd.Assertion.AfterState.MachineStatus) continue } - afterGS := nd.AfterState().GlobalState caughtUp, nodeMsgCount, err := GlobalStateToMsgCount(v.inboxTracker, v.txStreamer, afterGS) if errors.Is(err, ErrGlobalStateNotInChain) { wrongNodesExist = true - log.Error("Found incorrect assertion", "node", nd.NodeNum, "err", err) + log.Error("Found incorrect assertion", "node", nd.NodeNum, "afterGS", afterGS, "err", err) continue } if err != nil { - return nil, false, fmt.Errorf("error getting block number from global state: %w", err) + return nil, false, fmt.Errorf("error getting message number from global state: %w", err) } if !caughtUp { - return nil, false, fmt.Errorf("waiting for node to catch up to assertion blocks. Current: %d target: %v", validatedCount, afterGS) - } - if validatedCount < nodeMsgCount { - return nil, false, fmt.Errorf("waiting for validator to catch up to assertion blocks. %d / %d", validatedCount, nodeMsgCount) + return nil, false, fmt.Errorf("unexpected no-caught-up parsing assertion. Current: %d target: %v", validatedCount, afterGS) } log.Info( "found correct assertion", "node", nd.NodeNum, - "count", validatedCount, + "count", nodeMsgCount, "blockHash", afterGS.BlockHash, ) correctNode = existingNodeAction{ @@ -425,13 +438,14 @@ func (v *L1Validator) createNewNodeAction( if !prevInboxMaxCount.IsUint64() { return nil, fmt.Errorf("inbox max count %v isn't a uint64", prevInboxMaxCount) } - if validatedCount < startCount { + if validatedCount <= startCount { // we haven't validated any new blocks return nil, nil } if validatedGS.Batch < prevInboxMaxCount.Uint64() { // didn't validate enough batches - return nil, fmt.Errorf("waiting for validator to validate enough batches %d/%d", validatedGS.Batch, prevInboxMaxCount) + log.Info("staker: not enough batches validated to create new assertion", "validated.Batch", validatedGS.Batch, "posInBatch", validatedGS.PosInBatch, "required batch", prevInboxMaxCount) + return nil, nil } batchValidated := validatedGS.Batch if validatedGS.PosInBatch == 0 { diff --git a/system_tests/staker_test.go b/system_tests/staker_test.go index 4653fa8c87..4e2091ac64 100644 --- a/system_tests/staker_test.go +++ b/system_tests/staker_test.go @@ -316,7 +316,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) } } else if strings.Contains(err.Error(), "insufficient funds") && sawStakerZombie { // Expected error when trying to re-stake after losing initial stake. - } else if strings.Contains(err.Error(), "unknown start block hash") && sawStakerZombie { + } else if strings.Contains(err.Error(), "start state not in chain") && sawStakerZombie { // Expected error when trying to re-stake after the challenger's nodes getting confirmed. } else if strings.Contains(err.Error(), "STAKER_IS_ZOMBIE") && sawStakerZombie { // Expected error when the staker is a zombie and thus can't advance its stake. From ab70066d3d38dbc6b76b3e9ee2feb52c36c4090b Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 26 Jun 2023 12:30:46 -0600 Subject: [PATCH 35/63] block validator: support legacy lastValidated db --- staker/block_validator.go | 163 ++++++++++++++++++++++++------- staker/block_validator_schema.go | 16 ++- 2 files changed, 134 insertions(+), 45 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index b3653e465a..b8f8424226 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -50,8 +50,9 @@ type BlockValidator struct { nextCreatePrevDelayed uint64 // can only be accessed from from validation thread or if holding reorg-write - lastValidGS validator.GoGlobalState - valLoopPos arbutil.MessageIndex + lastValidGS validator.GoGlobalState + valLoopPos arbutil.MessageIndex + legacyValidInfo *legacyLastBlockValidatedDbInfo // only from logger thread lastValidInfoPrinted *GlobalStateValidatedInfo @@ -190,10 +191,16 @@ func NewBlockValidator( } if validated != nil { ret.lastValidGS = validated.GlobalState + } else { + legacyInfo, err := ret.legacyReadLastValidatedInfo() + if err != nil { + return nil, err + } + ret.legacyValidInfo = legacyInfo } } // genesis block is impossible to validate unless genesis state is empty - if ret.lastValidGS.Batch == 0 { + if ret.lastValidGS.Batch == 0 && ret.legacyValidInfo == nil { genesis, err := streamer.ResultAtCount(1) if err != nil { return nil, err @@ -282,6 +289,26 @@ func (v *BlockValidator) ReadLastValidatedInfo() (*GlobalStateValidatedInfo, err return ReadLastValidatedInfo(v.db) } +func (v *BlockValidator) legacyReadLastValidatedInfo() (*legacyLastBlockValidatedDbInfo, error) { + exists, err := v.db.Has(legacyLastBlockValidatedInfoKey) + if err != nil { + return nil, err + } + var validated legacyLastBlockValidatedDbInfo + if !exists { + return nil, nil + } + gsBytes, err := v.db.Get(legacyLastBlockValidatedInfoKey) + if err != nil { + return nil, err + } + err = rlp.DecodeBytes(gsBytes, &validated) + if err != nil { + return nil, err + } + return &validated, nil +} + var ErrGlobalStateNotInChain = errors.New("globalstate not in chain") // false if chain not caught up to globalstate @@ -333,37 +360,6 @@ func GlobalStateToMsgCount(tracker InboxTrackerInterface, streamer TransactionSt return true, count, nil } -func (v *BlockValidator) checkValidatedGSCaughtUp(ctx context.Context) (bool, error) { - v.reorgMutex.Lock() - defer v.reorgMutex.Unlock() - if v.chainCaughtUp { - return true, nil - } - if v.lastValidGS.Batch == 0 { - return false, errors.New("lastValid not initialized. cannot validate genesis") - } - caughtUp, count, err := GlobalStateToMsgCount(v.inboxTracker, v.streamer, v.lastValidGS) - if err != nil { - return false, err - } - if !caughtUp { - return false, nil - } - msg, err := v.streamer.GetMessage(count - 1) - if err != nil { - return false, err - } - v.nextCreateBatchReread = true - v.nextCreateStartGS = v.lastValidGS - v.nextCreatePrevDelayed = msg.DelayedMessagesRead - atomicStorePos(&v.createdA, count) - atomicStorePos(&v.recordSentA, count) - atomicStorePos(&v.validatedA, count) - validatorMsgCountValidatedGauge.Update(int64(count)) - v.chainCaughtUp = true - return true, nil -} - func (v *BlockValidator) sendRecord(s *validationStatus) error { if !v.Started() { return nil @@ -887,9 +883,106 @@ func (v *BlockValidator) Initialize(ctx context.Context) error { return nil } +func (v *BlockValidator) checkLegacyValid() error { + v.reorgMutex.Lock() + defer v.reorgMutex.Unlock() + if v.legacyValidInfo == nil { + return nil + } + batchCount, err := v.inboxTracker.GetBatchCount() + if err != nil { + return err + } + requiredBatchCount := v.legacyValidInfo.AfterPosition.BatchNumber + 1 + if v.legacyValidInfo.AfterPosition.PosInBatch == 0 { + requiredBatchCount -= 1 + } + if batchCount < requiredBatchCount { + // waiting to read more batches + return nil + } + msgCount, err := v.inboxTracker.GetBatchMessageCount(v.legacyValidInfo.AfterPosition.BatchNumber) + if err != nil { + return err + } + msgCount += arbutil.MessageIndex(v.legacyValidInfo.AfterPosition.PosInBatch) + processedCount, err := v.streamer.GetProcessedMessageCount() + if err != nil { + return err + } + if processedCount < msgCount { + // waiting to process more messages + return nil + } + result, err := v.streamer.ResultAtCount(msgCount) + if err != nil { + return err + } + if result.BlockHash != v.legacyValidInfo.BlockHash { + log.Error("legacy validated blockHash does not fit chain", "info.BlockHash", v.legacyValidInfo.BlockHash, "chain", result.BlockHash, "count", msgCount) + return fmt.Errorf("legacy validated blockHash does not fit chain") + } + v.lastValidGS = validator.GoGlobalState{ + BlockHash: result.BlockHash, + SendRoot: result.SendRoot, + Batch: v.legacyValidInfo.AfterPosition.BatchNumber, + PosInBatch: v.legacyValidInfo.AfterPosition.PosInBatch, + } + err = v.writeLastValidatedToDb(v.lastValidGS, []common.Hash{}) + if err == nil { + err = v.db.Delete(legacyLastBlockValidatedInfoKey) + if err != nil { + err = fmt.Errorf("deleting legacy: %w", err) + } + } + if err != nil { + log.Error("failed writing initial lastValid on upgrade from legacy", "new-info", v.lastValidGS, "err", err) + } else { + log.Info("updated last-valid from legacy", "lastValid", v.lastValidGS) + } + v.legacyValidInfo = nil + return nil +} + +// checks that the chain caught up to lastValidGS, used in startup +func (v *BlockValidator) checkValidatedGSCaughtUp() (bool, error) { + v.reorgMutex.Lock() + defer v.reorgMutex.Unlock() + if v.chainCaughtUp { + return true, nil + } + if v.lastValidGS.Batch == 0 { + return false, errors.New("lastValid not initialized. cannot validate genesis") + } + caughtUp, count, err := GlobalStateToMsgCount(v.inboxTracker, v.streamer, v.lastValidGS) + if err != nil { + return false, err + } + if !caughtUp { + return false, nil + } + msg, err := v.streamer.GetMessage(count - 1) + if err != nil { + return false, err + } + v.nextCreateBatchReread = true + v.nextCreateStartGS = v.lastValidGS + v.nextCreatePrevDelayed = msg.DelayedMessagesRead + atomicStorePos(&v.createdA, count) + atomicStorePos(&v.recordSentA, count) + atomicStorePos(&v.validatedA, count) + validatorMsgCountValidatedGauge.Update(int64(count)) + v.chainCaughtUp = true + return true, nil +} + func (v *BlockValidator) LaunchWorkthreadsWhenCaughtUp(ctx context.Context) { for { - caughtUp, err := v.checkValidatedGSCaughtUp(ctx) + err := v.checkLegacyValid() + if err != nil { + log.Error("validator got error updating legacy validated info", "err", err) + } + caughtUp, err := v.checkValidatedGSCaughtUp() if err != nil { log.Error("validator got error waiting for chain to catch up", "err", err) } diff --git a/staker/block_validator_schema.go b/staker/block_validator_schema.go index 6939dd05b2..f6eb39f015 100644 --- a/staker/block_validator_schema.go +++ b/staker/block_validator_schema.go @@ -8,15 +8,11 @@ import ( "github.com/offchainlabs/nitro/validator" ) -// Todo: we could create an upgrade scheme for moving from lastMessageValidated to lastBlockValidated -// not a must, since even without this index, we'll start validation from last assertion made -// the other option is to remove lastBlockValidated* from code - -// type legacyLastBlockValidatedDbInfo struct { -// BlockNumber uint64 -// BlockHash common.Hash -// AfterPosition GlobalStatePosition -// } +type legacyLastBlockValidatedDbInfo struct { + BlockNumber uint64 + BlockHash common.Hash + AfterPosition GlobalStatePosition +} type GlobalStateValidatedInfo struct { GlobalState validator.GoGlobalState @@ -25,5 +21,5 @@ type GlobalStateValidatedInfo struct { var ( lastGlobalStateValidatedInfoKey = []byte("_lastGlobalStateValidatedInfo") // contains a rlp encoded lastBlockValidatedDbInfo - // legacyLastBlockValidatedInfoKey = []byte("_lastBlockValidatedInfo") // contains a rlp encoded lastBlockValidatedDbInfo + legacyLastBlockValidatedInfoKey = []byte("_lastBlockValidatedInfo") // LEGACY - contains a rlp encoded lastBlockValidatedDbInfo ) From ce29aaef1b1fa5edd35d2399a62c18c3c5e58db0 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 26 Jun 2023 15:36:58 -0600 Subject: [PATCH 36/63] warn when catchin up to last valid --- staker/block_validator.go | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index b8f8424226..5e980e12f3 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -898,7 +898,7 @@ func (v *BlockValidator) checkLegacyValid() error { requiredBatchCount -= 1 } if batchCount < requiredBatchCount { - // waiting to read more batches + log.Warn("legacy valid batch ahead of db", "current", batchCount, "required", requiredBatchCount) return nil } msgCount, err := v.inboxTracker.GetBatchMessageCount(v.legacyValidInfo.AfterPosition.BatchNumber) @@ -911,7 +911,7 @@ func (v *BlockValidator) checkLegacyValid() error { return err } if processedCount < msgCount { - // waiting to process more messages + log.Warn("legacy valid message count ahead of db", "current", processedCount, "required", msgCount) return nil } result, err := v.streamer.ResultAtCount(msgCount) @@ -959,6 +959,22 @@ func (v *BlockValidator) checkValidatedGSCaughtUp() (bool, error) { return false, err } if !caughtUp { + batchCount, err := v.inboxTracker.GetBatchCount() + if err != nil { + log.Error("failed reading batch count", "err", err) + batchCount = 0 + } + batchMsgCount, err := v.inboxTracker.GetBatchMessageCount(batchCount) + if err != nil { + log.Error("failed reading batchMsgCount", "err", err) + batchMsgCount = 0 + } + processedMsgCount, err := v.streamer.GetProcessedMessageCount() + if err != nil { + log.Error("failed reading processedMsgCount", "err", err) + processedMsgCount = 0 + } + log.Warn("validator catching up to last valid", "lastValid.Batch", v.lastValidGS.Batch, "lastValid.PosInBatch", v.lastValidGS.PosInBatch, "batchCount", batchCount, "batchMsgCount", batchMsgCount, "processedMsgCount", processedMsgCount) return false, nil } msg, err := v.streamer.GetMessage(count - 1) @@ -980,11 +996,11 @@ func (v *BlockValidator) LaunchWorkthreadsWhenCaughtUp(ctx context.Context) { for { err := v.checkLegacyValid() if err != nil { - log.Error("validator got error updating legacy validated info", "err", err) + log.Error("validator got error updating legacy validated info. Consider restarting with dangerous.reset-block-validation", "err", err) } caughtUp, err := v.checkValidatedGSCaughtUp() if err != nil { - log.Error("validator got error waiting for chain to catch up", "err", err) + log.Error("validator got error waiting for chain to catch up. Consider restarting with dangerous.reset-block-validation", "err", err) } if caughtUp { break From a20f5bac4f9f538071ab9d0569151245bfc8d1ca Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Tue, 27 Jun 2023 12:24:51 -0600 Subject: [PATCH 37/63] validated not caught up if there is legacy information --- go-ethereum | 2 +- staker/block_validator.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/go-ethereum b/go-ethereum index f214ae0426..7ea707239f 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit f214ae0426fa0affbc29871a5277f4dc75afb0bb +Subproject commit 7ea707239f34fc2fa56be4b1b97a470f00cab600 diff --git a/staker/block_validator.go b/staker/block_validator.go index 5e980e12f3..01a0b91d06 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -951,6 +951,9 @@ func (v *BlockValidator) checkValidatedGSCaughtUp() (bool, error) { if v.chainCaughtUp { return true, nil } + if v.legacyValidInfo != nil { + return false, nil + } if v.lastValidGS.Batch == 0 { return false, errors.New("lastValid not initialized. cannot validate genesis") } From 2dc39a990b89acbeb507f4bd194ee8354ff58667 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 28 Jun 2023 14:22:38 -0600 Subject: [PATCH 38/63] fix merge errors --- arbnode/execution/node.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arbnode/execution/node.go b/arbnode/execution/node.go index 8b9cdd7538..874648db16 100644 --- a/arbnode/execution/node.go +++ b/arbnode/execution/node.go @@ -70,7 +70,7 @@ func CreateExecutionNode( LogCacheSize: rpcConfig.FilterLogCacheSize, Timeout: rpcConfig.FilterTimeout, } - backend, filterSystem, err := arbitrum.NewBackend(stack, &rpcConfig, chainDB, arbInterface, syncMonitor, filterConfig) + backend, filterSystem, err := arbitrum.NewBackend(stack, &rpcConfig, chainDB, arbInterface, filterConfig) if err != nil { return nil, err } From 37f9fb804394967d53d04d527ad0a39839a0d5df Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 28 Jun 2023 14:22:53 -0600 Subject: [PATCH 39/63] stop node even if start errored --- cmd/nitro/nitro.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 0eeb6cfade..41a1e7cced 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -459,9 +459,8 @@ func mainImpl() int { err = currentNode.Start(ctx) if err != nil { fatalErrChan <- fmt.Errorf("error starting node: %w", err) - } else { - defer currentNode.StopAndWait() } + defer currentNode.StopAndWait() } sigint := make(chan os.Signal, 1) From 86ed5e20be65ee3d775b08819dee30318ed2f0d4 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 28 Jun 2023 14:29:40 -0600 Subject: [PATCH 40/63] update geth --- go-ethereum | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-ethereum b/go-ethereum index 7ea707239f..174034e7ed 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit 7ea707239f34fc2fa56be4b1b97a470f00cab600 +Subproject commit 174034e7eda7407463691cadada7cb83ebd291a1 From 60e9dfbc743d12df8cf30f2e81707112dec0ffd9 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 28 Jun 2023 20:40:00 -0600 Subject: [PATCH 41/63] geth: go back to origin/master --- go-ethereum | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-ethereum b/go-ethereum index 174034e7ed..f7609eef4b 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit 174034e7eda7407463691cadada7cb83ebd291a1 +Subproject commit f7609eef4bbd3a500292d12eed03956ffc5d6527 From 2887b77c8420707037c2bc2565b25fd36136fd72 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 28 Jun 2023 20:40:24 -0600 Subject: [PATCH 42/63] Revert "fix merge errors" This reverts commit 2dc39a990b89acbeb507f4bd194ee8354ff58667. --- arbnode/execution/node.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arbnode/execution/node.go b/arbnode/execution/node.go index 874648db16..8b9cdd7538 100644 --- a/arbnode/execution/node.go +++ b/arbnode/execution/node.go @@ -70,7 +70,7 @@ func CreateExecutionNode( LogCacheSize: rpcConfig.FilterLogCacheSize, Timeout: rpcConfig.FilterTimeout, } - backend, filterSystem, err := arbitrum.NewBackend(stack, &rpcConfig, chainDB, arbInterface, filterConfig) + backend, filterSystem, err := arbitrum.NewBackend(stack, &rpcConfig, chainDB, arbInterface, syncMonitor, filterConfig) if err != nil { return nil, err } From 4369b2772e5558b50070a1302720c8018fe99ba5 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 29 Jun 2023 10:11:32 -0600 Subject: [PATCH 43/63] l1_validator: add nil check --- staker/l1_validator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staker/l1_validator.go b/staker/l1_validator.go index 6c5d98ee19..7bdce64ba6 100644 --- a/staker/l1_validator.go +++ b/staker/l1_validator.go @@ -271,7 +271,7 @@ func (v *L1Validator) generateNodeAction(ctx context.Context, stakerInfo *OurSta var validatedGlobalState validator.GoGlobalState if v.blockValidator != nil { valInfo, err := v.blockValidator.ReadLastValidatedInfo() - if err != nil { + if err != nil || valInfo == nil { return nil, false, err } validatedGlobalState = valInfo.GlobalState From aa767ea82c1e5b434cfee4ea523654ea8fd10fca Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 29 Jun 2023 14:57:24 -0600 Subject: [PATCH 44/63] staker: regularly check latest staked --- arbnode/node.go | 2 +- staker/block_validator.go | 88 +++++++++++++++++++++++++++++++++++-- staker/staker.go | 46 ++++++++++++++++++- system_tests/staker_test.go | 3 ++ 4 files changed, 134 insertions(+), 5 deletions(-) diff --git a/arbnode/node.go b/arbnode/node.go index 033f26eadc..46968678f6 100644 --- a/arbnode/node.go +++ b/arbnode/node.go @@ -817,7 +817,7 @@ func createNodeImpl( } } - stakerObj, err = staker.NewStaker(l1Reader, wallet, bind.CallOpts{}, config.Staker, blockValidator, statelessBlockValidator, deployInfo.ValidatorUtils) + stakerObj, err = staker.NewStaker(l1Reader, wallet, bind.CallOpts{}, config.Staker, blockValidator, statelessBlockValidator, deployInfo.ValidatorUtils, fatalErrChan) if err != nil { return nil, err } diff --git a/staker/block_validator.go b/staker/block_validator.go index 01a0b91d06..517419cc86 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -774,20 +774,102 @@ func (v *BlockValidator) writeLastValidatedToDb(gs validator.GoGlobalState, wasm return nil } -func (v *BlockValidator) AssumeValid(globalState validator.GoGlobalState) error { +func (v *BlockValidator) validGSIsNew(globalState validator.GoGlobalState) bool { + if v.legacyValidInfo != nil { + if v.legacyValidInfo.AfterPosition.BatchNumber > globalState.Batch { + return false + } + if v.legacyValidInfo.AfterPosition.BatchNumber == globalState.Batch && v.legacyValidInfo.AfterPosition.PosInBatch >= globalState.PosInBatch { + return false + } + return true + } + if v.lastValidGS.Batch > globalState.Batch { + return false + } + if v.lastValidGS.Batch == globalState.Batch && v.lastValidGS.PosInBatch >= globalState.PosInBatch { + return false + } + return true +} + +// this accepts globalstate even if not caught up +func (v *BlockValidator) InitAssumeValid(globalState validator.GoGlobalState) error { if v.Started() { return fmt.Errorf("cannot handle AssumeValid while running") } // don't do anything if we already validated past that - if v.lastValidGS.Batch > globalState.Batch { + if !v.validGSIsNew(globalState) { return nil } - if v.lastValidGS.Batch == globalState.Batch && v.lastValidGS.PosInBatch > globalState.PosInBatch { + + v.legacyValidInfo = nil + v.lastValidGS = globalState + + err := v.writeLastValidatedToDb(v.lastValidGS, []common.Hash{}) + if err != nil { + log.Error("failed writing new validated to database", "pos", v.lastValidGS, "err", err) + } + + return nil +} + +func (v *BlockValidator) AssumeValid(count arbutil.MessageIndex, globalState validator.GoGlobalState) error { + + if count <= v.validated() { return nil } + v.reorgMutex.Lock() + defer v.reorgMutex.Unlock() + + if count <= v.validated() { + return nil + } + + if !v.chainCaughtUp { + if !v.validGSIsNew(globalState) { + return nil + } + v.legacyValidInfo = nil + v.lastValidGS = globalState + return nil + } + + countUint64 := uint64(count) + msg, err := v.streamer.GetMessage(count - 1) + if err != nil { + return err + } + // delete no-longer relevant entries + for iPos := v.validated(); iPos < count && iPos < v.created(); iPos++ { + status, found := v.validations.Load(iPos) + if found && status != nil && status.Cancel != nil { + status.Cancel() + } + v.validations.Delete(iPos) + } + if v.created() < count { + v.nextCreateStartGS = globalState + v.nextCreatePrevDelayed = msg.DelayedMessagesRead + v.nextCreateBatchReread = true + v.createdA = countUint64 + } + // under the reorg mutex we don't need atomic access + if v.recordSentA < countUint64 { + v.recordSentA = countUint64 + } + v.validatedA = countUint64 + v.valLoopPos = count + validatorMsgCountValidatedGauge.Update(int64(countUint64)) v.lastValidGS = globalState + err = v.writeLastValidatedToDb(v.lastValidGS, []common.Hash{}) // we don't know which wasm roots were validated + if err != nil { + log.Error("failed writing valid state after reorg", "err", err) + } + nonBlockingTrigger(v.createNodesChan) + return nil } diff --git a/staker/staker.go b/staker/staker.go index 79f41e824f..4ac85c2895 100644 --- a/staker/staker.go +++ b/staker/staker.go @@ -199,6 +199,7 @@ type Staker struct { bringActiveUntilNode uint64 inboxReader InboxReaderInterface statelessBlockValidator *StatelessBlockValidator + fatalErr chan<- error } func NewStaker( @@ -209,6 +210,7 @@ func NewStaker( blockValidator *BlockValidator, statelessBlockValidator *StatelessBlockValidator, validatorUtilsAddress common.Address, + fatalErr chan<- error, ) (*Staker, error) { if err := config.Validate(); err != nil { @@ -230,6 +232,7 @@ func NewStaker( lastActCalledBlock: nil, inboxReader: statelessBlockValidator.inboxReader, statelessBlockValidator: statelessBlockValidator, + fatalErr: fatalErr, }, nil } @@ -257,9 +260,43 @@ func (s *Staker) Initialize(ctx context.Context) error { return err } - return s.blockValidator.AssumeValid(stakedInfo.AfterState().GlobalState) + return s.blockValidator.InitAssumeValid(stakedInfo.AfterState().GlobalState) + } + return nil +} + +func (s *Staker) checkLatestStaked(ctx context.Context) error { + latestStaked, _, err := s.validatorUtils.LatestStaked(&s.baseCallOpts, s.rollupAddress, s.wallet.AddressOrZero()) + if err != nil { + return fmt.Errorf("couldn't get LatestStaked: %w", err) + } + stakerLatestStakedNodeGauge.Update(int64(latestStaked)) + if latestStaked == 0 { + return nil + } + + stakedInfo, err := s.rollup.LookupNode(ctx, latestStaked) + if err != nil { + return fmt.Errorf("couldn't look up latest node: %w", err) } + stakedGlobalState := stakedInfo.AfterState().GlobalState + caughtUp, count, err := GlobalStateToMsgCount(s.inboxTracker, s.txStreamer, stakedGlobalState) + if err != nil { + if errors.Is(err, ErrGlobalStateNotInChain) && s.fatalErr != nil { + fatal := fmt.Errorf("latest staked not in chain: %w", err) + s.fatalErr <- fatal + } + return fmt.Errorf("staker: latest staked %w", err) + } + + if !caughtUp { + log.Info("latest valid not yet in our node", "staked", stakedGlobalState) + return nil + } + if s.blockValidator != nil && s.config.StartFromStaked { + return s.blockValidator.AssumeValid(count, stakedGlobalState) + } return nil } @@ -317,6 +354,13 @@ func (s *Staker) Start(ctxIn context.Context) { } return backoff }) + s.CallIteratively(func(ctx context.Context) time.Duration { + err := s.checkLatestStaked(ctx) + if err != nil && ctx.Err() == nil { + log.Error("staker: error checking latest staked", "err", err) + } + return s.config.StakerInterval + }) } func (s *Staker) IsWhitelisted(ctx context.Context) (bool, error) { diff --git a/system_tests/staker_test.go b/system_tests/staker_test.go index 4e2091ac64..aa1ae0a64e 100644 --- a/system_tests/staker_test.go +++ b/system_tests/staker_test.go @@ -166,6 +166,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) nil, statelessA, l2nodeA.DeployInfo.ValidatorUtils, + nil, ) Require(t, err) err = stakerA.Initialize(ctx) @@ -199,6 +200,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) nil, statelessB, l2nodeB.DeployInfo.ValidatorUtils, + nil, ) Require(t, err) err = stakerB.Initialize(ctx) @@ -219,6 +221,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) nil, statelessA, l2nodeA.DeployInfo.ValidatorUtils, + nil, ) Require(t, err) if stakerC.Strategy() != staker.WatchtowerStrategy { From 63f7063664fe8760ca1691f78a6710ce5c9edd37 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 29 Jun 2023 17:25:48 -0600 Subject: [PATCH 45/63] batch_validator: fix messages when not caught up --- go-ethereum | 2 +- staker/block_validator.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/go-ethereum b/go-ethereum index 8e6a8ad494..f7609eef4b 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit 8e6a8ad4942591011e833e6ebceca6bd668f3db0 +Subproject commit f7609eef4bbd3a500292d12eed03956ffc5d6527 diff --git a/staker/block_validator.go b/staker/block_validator.go index 517419cc86..28dc0e00a4 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -1049,7 +1049,7 @@ func (v *BlockValidator) checkValidatedGSCaughtUp() (bool, error) { log.Error("failed reading batch count", "err", err) batchCount = 0 } - batchMsgCount, err := v.inboxTracker.GetBatchMessageCount(batchCount) + batchMsgCount, err := v.inboxTracker.GetBatchMessageCount(batchCount - 1) if err != nil { log.Error("failed reading batchMsgCount", "err", err) batchMsgCount = 0 From 789f21f9994cbde80a781dc12c8a56571e98bb86 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 29 Jun 2023 19:35:44 -0600 Subject: [PATCH 46/63] validation: manage room in client and not server --- system_tests/validation_mock_test.go | 82 +++++++++++++++++++++++ validator/server_api/validation_client.go | 31 ++++++--- validator/server_arb/validator_spawner.go | 39 ++--------- validator/server_common/valrun.go | 16 ++--- validator/server_jit/spawner.go | 13 ++-- 5 files changed, 118 insertions(+), 63 deletions(-) diff --git a/system_tests/validation_mock_test.go b/system_tests/validation_mock_test.go index 8e9133af31..bfa2d67839 100644 --- a/system_tests/validation_mock_test.go +++ b/system_tests/validation_mock_test.go @@ -25,6 +25,7 @@ import ( type mockSpawner struct { ExecSpawned []uint64 + LaunchDelay time.Duration } var blockHashKey = common.HexToHash("0x11223344") @@ -55,6 +56,7 @@ func (s *mockSpawner) Launch(entry *validator.ValidationInput, moduleRoot common Promise: containers.NewPromise[validator.GoGlobalState](nil), root: moduleRoot, } + <-time.After(s.LaunchDelay) run.Produce(globalstateFromTestPreimages(entry.Preimages)) return run } @@ -237,6 +239,86 @@ func TestValidationServerAPI(t *testing.T) { } } +func TestValidationClientRoom(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + mockSpawner, spawnerStack := createMockValidationNode(t, ctx, nil) + client := server_api.NewExecutionClient(StaticFetcherFrom(t, &rpcclient.TestClientConfig), spawnerStack) + err := client.Start(ctx) + Require(t, err) + + wasmRoot, err := client.LatestWasmModuleRoot().Await(ctx) + Require(t, err) + + if client.Room() != 4 { + Fatal(t, "wrong initial room ", client.Room()) + } + + hash1 := common.HexToHash("0x11223344556677889900aabbccddeeff") + hash2 := common.HexToHash("0x11111111122222223333333444444444") + + startState := validator.GoGlobalState{ + BlockHash: hash1, + SendRoot: hash2, + Batch: 300, + PosInBatch: 3000, + } + endState := validator.GoGlobalState{ + BlockHash: hash2, + SendRoot: hash1, + Batch: 3000, + PosInBatch: 300, + } + + valInput := validator.ValidationInput{ + StartState: startState, + Preimages: globalstateToTestPreimages(endState), + } + + valRuns := make([]validator.ValidationRun, 0, 4) + + for i := 0; i < 4; i++ { + valRun := client.Launch(&valInput, wasmRoot) + valRuns = append(valRuns, valRun) + } + + for i := range valRuns { + _, err := valRuns[i].Await(ctx) + Require(t, err) + } + + if client.Room() != 4 { + Fatal(t, "wrong room after launch", client.Room()) + } + + mockSpawner.LaunchDelay = time.Hour + + valRuns = make([]validator.ValidationRun, 0, 3) + + for i := 0; i < 4; i++ { + valRun := client.Launch(&valInput, wasmRoot) + valRuns = append(valRuns, valRun) + room := client.Room() + if room != 3-i { + Fatal(t, "wrong room after launch ", room, " expected: ", 4-i) + } + } + + for i := range valRuns { + valRuns[i].Cancel() + _, err := valRuns[i].Await(ctx) + if err == nil { + Fatal(t, "no error returned after cancel i:", i) + } + } + + room := client.Room() + if room != 4 { + Fatal(t, "wrong room after canceling runs: ", room) + } +} + func TestExecutionKeepAlive(t *testing.T) { t.Parallel() ctx, cancel := context.WithCancel(context.Background()) diff --git a/validator/server_api/validation_client.go b/validator/server_api/validation_client.go index 4f678fde9e..d6143ca917 100644 --- a/validator/server_api/validation_client.go +++ b/validator/server_api/validation_client.go @@ -4,6 +4,7 @@ import ( "context" "encoding/base64" "errors" + "sync/atomic" "time" "github.com/offchainlabs/nitro/validator" @@ -23,6 +24,7 @@ type ValidationClient struct { stopwaiter.StopWaiter client *rpcclient.RpcClient name string + room int32 } func NewValidationClient(config rpcclient.ClientConfigFetcher, stack *node.Node) *ValidationClient { @@ -32,14 +34,15 @@ func NewValidationClient(config rpcclient.ClientConfigFetcher, stack *node.Node) } func (c *ValidationClient) Launch(entry *validator.ValidationInput, moduleRoot common.Hash) validator.ValidationRun { - valrun := server_common.NewValRun(moduleRoot) - c.LaunchThread(func(ctx context.Context) { + atomic.AddInt32(&c.room, -1) + promise := stopwaiter.LaunchPromiseThread[validator.GoGlobalState](c, func(ctx context.Context) (validator.GoGlobalState, error) { input := ValidationInputToJson(entry) var res validator.GoGlobalState err := c.client.CallContext(ctx, &res, Namespace+"_validate", input, moduleRoot) - valrun.ConsumeResult(res, err) + atomic.AddInt32(&c.room, 1) + return res, err }) - return valrun + return server_common.NewValRun(promise, moduleRoot) } func (c *ValidationClient) Start(ctx_in context.Context) error { @@ -57,6 +60,18 @@ func (c *ValidationClient) Start(ctx_in context.Context) error { if len(name) == 0 { return errors.New("couldn't read name from server") } + var room int + err = c.client.CallContext(c.GetContext(), &room, Namespace+"_room") + if err != nil { + return err + } + if room < 2 { + log.Warn("validation server not enough room, overriding to 2", "name", name, "room", room) + room = 2 + } else { + log.Info("connected to validation server", "name", name, "room", room) + } + atomic.StoreInt32(&c.room, int32(room)) c.name = name return nil } @@ -76,13 +91,11 @@ func (c *ValidationClient) Name() string { } func (c *ValidationClient) Room() int { - var res int - err := c.client.CallContext(c.GetContext(), &res, Namespace+"_room") - if err != nil { - log.Error("error contacting validation server", "name", c.name, "err", err) + room32 := atomic.LoadInt32(&c.room) + if room32 < 0 { return 0 } - return res + return int(room32) } type ExecutionClient struct { diff --git a/validator/server_arb/validator_spawner.go b/validator/server_arb/validator_spawner.go index a073d24c3c..f9d0705f59 100644 --- a/validator/server_arb/validator_spawner.go +++ b/validator/server_arb/validator_spawner.go @@ -57,32 +57,6 @@ type ArbitratorSpawner struct { config ArbitratorSpawnerConfigFecher } -type valRun struct { - containers.Promise[validator.GoGlobalState] - root common.Hash -} - -func (r *valRun) WasmModuleRoot() common.Hash { - return r.root -} - -func (r *valRun) Close() {} - -func NewvalRun(root common.Hash) *valRun { - return &valRun{ - Promise: containers.NewPromise[validator.GoGlobalState](nil), - root: root, - } -} - -func (r *valRun) consumeResult(res validator.GoGlobalState, err error) { - if err != nil { - r.ProduceError(err) - } else { - r.Produce(res) - } -} - func NewArbitratorSpawner(locator *server_common.MachineLocator, config ArbitratorSpawnerConfigFecher) (*ArbitratorSpawner, error) { // TODO: preload machines spawner := &ArbitratorSpawner{ @@ -180,12 +154,11 @@ func (v *ArbitratorSpawner) execute( func (v *ArbitratorSpawner) Launch(entry *validator.ValidationInput, moduleRoot common.Hash) validator.ValidationRun { atomic.AddInt32(&v.count, 1) - run := NewvalRun(moduleRoot) - v.LaunchThread(func(ctx context.Context) { + promise := stopwaiter.LaunchPromiseThread[validator.GoGlobalState](v, func(ctx context.Context) (validator.GoGlobalState, error) { defer atomic.AddInt32(&v.count, -1) - run.consumeResult(v.execute(ctx, entry, moduleRoot)) + return v.execute(ctx, entry, moduleRoot) }) - return run + return server_common.NewValRun(promise, moduleRoot) } func (v *ArbitratorSpawner) Room() int { @@ -193,11 +166,7 @@ func (v *ArbitratorSpawner) Room() int { if avail == 0 { avail = runtime.NumCPU() } - current := int(atomic.LoadInt32(&v.count)) - if current >= avail { - return 0 - } - return avail - current + return avail } var launchTime = time.Now().Format("2006_01_02__15_04") diff --git a/validator/server_common/valrun.go b/validator/server_common/valrun.go index 1331c29852..8486664008 100644 --- a/validator/server_common/valrun.go +++ b/validator/server_common/valrun.go @@ -7,7 +7,7 @@ import ( ) type ValRun struct { - containers.Promise[validator.GoGlobalState] + containers.PromiseInterface[validator.GoGlobalState] root common.Hash } @@ -15,17 +15,9 @@ func (r *ValRun) WasmModuleRoot() common.Hash { return r.root } -func NewValRun(root common.Hash) *ValRun { +func NewValRun(promise containers.PromiseInterface[validator.GoGlobalState], root common.Hash) *ValRun { return &ValRun{ - Promise: containers.NewPromise[validator.GoGlobalState](nil), - root: root, - } -} - -func (r *ValRun) ConsumeResult(res validator.GoGlobalState, err error) { - if err != nil { - r.ProduceError(err) - } else { - r.Produce(res) + PromiseInterface: promise, + root: root, } } diff --git a/validator/server_jit/spawner.go b/validator/server_jit/spawner.go index 7a3394bcae..6de006b182 100644 --- a/validator/server_jit/spawner.go +++ b/validator/server_jit/spawner.go @@ -90,12 +90,11 @@ func (s *JitSpawner) Name() string { func (v *JitSpawner) Launch(entry *validator.ValidationInput, moduleRoot common.Hash) validator.ValidationRun { atomic.AddInt32(&v.count, 1) - run := server_common.NewValRun(moduleRoot) - go func() { - run.ConsumeResult(v.execute(v.GetContext(), entry, moduleRoot)) - atomic.AddInt32(&v.count, -1) - }() - return run + promise := stopwaiter.LaunchPromiseThread[validator.GoGlobalState](v, func(ctx context.Context) (validator.GoGlobalState, error) { + defer atomic.AddInt32(&v.count, -1) + return v.execute(ctx, entry, moduleRoot) + }) + return server_common.NewValRun(promise, moduleRoot) } func (v *JitSpawner) Room() int { @@ -103,7 +102,7 @@ func (v *JitSpawner) Room() int { if avail == 0 { avail = runtime.NumCPU() } - return avail - int(atomic.LoadInt32(&v.count)) + return avail } func (v *JitSpawner) Stop() { From ebbc572d8543603f51c0ecff7a6e859730cb78d6 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 29 Jun 2023 19:54:03 -0600 Subject: [PATCH 47/63] block_validator: call launch in main thread --- staker/block_validator.go | 43 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index 28dc0e00a4..7ec395fb10 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -699,38 +699,39 @@ validationsLoop: return nil, nil } if currentStatus == Prepared { + input, err := validationStatus.Entry.ToInput() + if err != nil && ctx.Err() == nil { + v.possiblyFatal(fmt.Errorf("%w: error preparing validation", err)) + continue + } replaced := validationStatus.replaceStatus(Prepared, SendingValidation) if !replaced { v.possiblyFatal(errors.New("failed to set SendingValidation status")) } - v.LaunchThread(func(ctx context.Context) { - validationCtx, cancel := context.WithCancel(ctx) - defer cancel() - validationStatus.Cancel = cancel - input, err := validationStatus.Entry.ToInput() - if err != nil && validationCtx.Err() == nil { - v.possiblyFatal(fmt.Errorf("%w: error preparing validation", err)) - return - } - validatorPendingValidationsGauge.Inc(1) - defer validatorPendingValidationsGauge.Dec(1) - var runs []validator.ValidationRun - for _, moduleRoot := range wasmRoots { - for i, spawner := range v.validationSpawners { - run := spawner.Launch(input, moduleRoot) - log.Trace("advanceValidations: launched", "pos", validationStatus.Entry.Pos, "moduleRoot", moduleRoot, "spawner", i) - runs = append(runs, run) - } + validatorPendingValidationsGauge.Inc(1) + defer validatorPendingValidationsGauge.Dec(1) + var runs []validator.ValidationRun + for _, moduleRoot := range wasmRoots { + for i, spawner := range v.validationSpawners { + run := spawner.Launch(input, moduleRoot) + log.Trace("advanceValidations: launched", "pos", validationStatus.Entry.Pos, "moduleRoot", moduleRoot, "spawner", i) + runs = append(runs, run) } - validationStatus.Runs = runs - replaced := validationStatus.replaceStatus(SendingValidation, ValidationSent) + } + validationCtx, cancel := context.WithCancel(ctx) + validationStatus.Runs = runs + validationStatus.Cancel = cancel + v.LaunchUntrackedThread(func() { + defer cancel() + replaced = validationStatus.replaceStatus(SendingValidation, ValidationSent) if !replaced { v.possiblyFatal(errors.New("failed to set status to ValidationSent")) } + // validationStatus might be removed from under us // trigger validation progress when done for _, run := range runs { - _, err := run.Await(ctx) + _, err := run.Await(validationCtx) if err != nil { return } From e96d7796055b39b8cf62490bfccfd73833047466 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 30 Jun 2023 12:06:42 -0600 Subject: [PATCH 48/63] pruner: deleteFromRange return uint64 Also solving a bug that caused to return identical first and last --- arbnode/message_pruner.go | 2 +- arbnode/transaction_streamer.go | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arbnode/message_pruner.go b/arbnode/message_pruner.go index 1ba3886d8d..81b85daf77 100644 --- a/arbnode/message_pruner.go +++ b/arbnode/message_pruner.go @@ -117,7 +117,7 @@ func deleteOldMessageFromDB(endBatchCount uint64, endBatchMetadata BatchMetadata } } -func deleteFromLastPrunedUptoEndKey(db ethdb.Database, prefix []byte, endMinKey uint64) ([][]byte, error) { +func deleteFromLastPrunedUptoEndKey(db ethdb.Database, prefix []byte, endMinKey uint64) ([]uint64, error) { startIter := db.NewIterator(prefix, uint64ToKey(1)) if !startIter.Next() { return nil, nil diff --git a/arbnode/transaction_streamer.go b/arbnode/transaction_streamer.go index 9e56f555f2..2d371f4788 100644 --- a/arbnode/transaction_streamer.go +++ b/arbnode/transaction_streamer.go @@ -206,19 +206,20 @@ func deleteStartingAt(db ethdb.Database, batch ethdb.Batch, prefix []byte, minKe } // deleteFromRange deletes key ranging from startMinKey(inclusive) to endMinKey(exclusive) -func deleteFromRange(db ethdb.Database, prefix []byte, startMinKey uint64, endMinKey uint64) ([][]byte, error) { +func deleteFromRange(db ethdb.Database, prefix []byte, startMinKey uint64, endMinKey uint64) ([]uint64, error) { batch := db.NewBatch() startIter := db.NewIterator(prefix, uint64ToKey(startMinKey)) defer startIter.Release() - var prunedKeysRange [][]byte + var prunedKeysRange []uint64 for startIter.Next() { - if binary.BigEndian.Uint64(bytes.TrimPrefix(startIter.Key(), prefix)) >= endMinKey { + currentKey := binary.BigEndian.Uint64(bytes.TrimPrefix(startIter.Key(), prefix)) + if currentKey >= endMinKey { break } if len(prunedKeysRange) == 0 || len(prunedKeysRange) == 1 { - prunedKeysRange = append(prunedKeysRange, startIter.Key()) + prunedKeysRange = append(prunedKeysRange, currentKey) } else { - prunedKeysRange[1] = startIter.Key() + prunedKeysRange[1] = currentKey } err := batch.Delete(startIter.Key()) if err != nil { From 18e836585862944e35376b0e82bd2847e52a798c Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 30 Jun 2023 16:05:52 -0600 Subject: [PATCH 49/63] message pruner updates call from staker don't prune batches required for reports --- arbnode/message_pruner.go | 170 ++++++++++++++++++++++++-------- arbnode/message_pruner_test.go | 30 ++++-- arbnode/node.go | 14 ++- arbnode/transaction_streamer.go | 6 +- staker/block_validator.go | 15 ++- staker/staker.go | 25 ++++- system_tests/staker_test.go | 3 + 7 files changed, 201 insertions(+), 62 deletions(-) diff --git a/arbnode/message_pruner.go b/arbnode/message_pruner.go index 81b85daf77..07ca12e3f2 100644 --- a/arbnode/message_pruner.go +++ b/arbnode/message_pruner.go @@ -7,16 +7,19 @@ import ( "bytes" "context" "encoding/binary" - "math/big" + "errors" + "fmt" + "sync" "time" - "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common/math" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/rpc" - "github.com/offchainlabs/nitro/staker" + "github.com/offchainlabs/nitro/arbos/arbostypes" + "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/util/stopwaiter" + "github.com/offchainlabs/nitro/validator" flag "github.com/spf13/pflag" ) @@ -25,99 +28,184 @@ type MessagePruner struct { stopwaiter.StopWaiter transactionStreamer *TransactionStreamer inboxTracker *InboxTracker - staker *staker.Staker config MessagePrunerConfigFetcher + pruningLock sync.Mutex + lastPruneDone time.Time } type MessagePrunerConfig struct { - Enable bool `koanf:"enable"` - MessagePruneInterval time.Duration `koanf:"prune-interval" reload:"hot"` + Enable bool `koanf:"enable"` + MessagePruneInterval time.Duration `koanf:"prune-interval" reload:"hot"` + SearchBatchReportLimit int64 `koanf:"search-batch-report" reload:"hot"` } type MessagePrunerConfigFetcher func() *MessagePrunerConfig var DefaultMessagePrunerConfig = MessagePrunerConfig{ - Enable: true, - MessagePruneInterval: time.Minute, + Enable: true, + MessagePruneInterval: time.Minute, + SearchBatchReportLimit: 100000, } func MessagePrunerConfigAddOptions(prefix string, f *flag.FlagSet) { f.Bool(prefix+".enable", DefaultMessagePrunerConfig.Enable, "enable message pruning") f.Duration(prefix+".prune-interval", DefaultMessagePrunerConfig.MessagePruneInterval, "interval for running message pruner") + f.Int64(prefix+"search-batch-report", DefaultMessagePrunerConfig.SearchBatchReportLimit, "limit for searching for a batch report when pruning (negative disables)") } -func NewMessagePruner(transactionStreamer *TransactionStreamer, inboxTracker *InboxTracker, staker *staker.Staker, config MessagePrunerConfigFetcher) *MessagePruner { +func NewMessagePruner(transactionStreamer *TransactionStreamer, inboxTracker *InboxTracker, config MessagePrunerConfigFetcher) *MessagePruner { return &MessagePruner{ transactionStreamer: transactionStreamer, inboxTracker: inboxTracker, - staker: staker, config: config, } } func (m *MessagePruner) Start(ctxIn context.Context) { m.StopWaiter.Start(ctxIn, m) - m.CallIteratively(m.prune) } -func (m *MessagePruner) prune(ctx context.Context) time.Duration { - latestConfirmedNode, err := m.staker.Rollup().LatestConfirmed( - &bind.CallOpts{ - Context: ctx, - BlockNumber: big.NewInt(int64(rpc.FinalizedBlockNumber)), - }) +func (m *MessagePruner) UpdateLatestStaked(count arbutil.MessageIndex, globalState validator.GoGlobalState) { + locked := m.pruningLock.TryLock() + if !locked { + return + } + + if m.lastPruneDone.Add(m.config().MessagePruneInterval).After(time.Now()) { + m.pruningLock.Unlock() + return + } + err := m.LaunchThreadSafe(func(ctx context.Context) { + defer m.pruningLock.Unlock() + err := m.prune(ctx, count, globalState) + if err != nil { + log.Error("error while pruning", "err", err) + } + }) if err != nil { - log.Error("error getting latest confirmed node", "err", err) - return m.config().MessagePruneInterval + log.Info("failed launching prune thread", "err", err) + m.pruningLock.Unlock() } - nodeInfo, err := m.staker.Rollup().LookupNode(ctx, latestConfirmedNode) +} + +// looks for batch posting report starting from delayed message delayedMsgStart +// returns number of batch for which report was found (meaning - it should not be pruned) +// if not found - returns maxUint64 (no limit on pruning) +func (m *MessagePruner) findBatchReport(ctx context.Context, delayedMsgStart uint64) (uint64, error) { + searchLimit := m.config().SearchBatchReportLimit + if searchLimit < 0 { + return math.MaxUint64, nil + } + delayedCount, err := m.inboxTracker.GetDelayedCount() if err != nil { - log.Error("error getting latest confirmed node info", "node", latestConfirmedNode, "err", err) - return m.config().MessagePruneInterval + return 0, err + } + if delayedCount <= delayedMsgStart { + return 0, errors.New("delayedCount behind pruning target") + } + searchUpTil := delayedCount + searchUpLimit := delayedMsgStart + uint64(searchLimit) + if searchLimit > 0 && searchUpLimit < searchUpTil { + searchUpTil = searchUpLimit + } + for delayed := delayedMsgStart; delayed < searchUpTil; delayed++ { + if ctx.Err() != nil { + return 0, ctx.Err() + } + msg, err := m.inboxTracker.GetDelayedMessage(delayed) + if err != nil { + return 0, err + } + if msg.Header.Kind == arbostypes.L1MessageType_BatchPostingReport { + _, _, _, batchNum, _, _, err := arbostypes.ParseBatchPostingReportMessageFields(bytes.NewReader(msg.L2msg)) + if err != nil { + return 0, fmt.Errorf("trying to parse batch-posting report: %w", err) + } + return batchNum, nil + } } - endBatchCount := nodeInfo.Assertion.AfterState.GlobalState.Batch + searchDownLimit := uint64(0) + if searchLimit > 0 { + searchedUp := searchUpTil - delayedMsgStart + limitRemaining := uint64(searchLimit) - searchedUp + if limitRemaining < delayedMsgStart { + searchDownLimit = delayedMsgStart - limitRemaining + } + } + for delayed := delayedMsgStart - 1; delayed >= searchDownLimit; delayed-- { + if ctx.Err() != nil { + return 0, ctx.Err() + } + msg, err := m.inboxTracker.GetDelayedMessage(delayed) + if errors.Is(err, AccumulatorNotFoundErr) { + // older delayed probably pruned - assume we won't find a report + return math.MaxUint64, nil + } + if err != nil { + return 0, err + } + if msg.Header.Kind == arbostypes.L1MessageType_BatchPostingReport { + _, _, _, batchNum, _, _, err := arbostypes.ParseBatchPostingReportMessageFields(bytes.NewReader(msg.L2msg)) + if err != nil { + return 0, fmt.Errorf("trying to parse batch-posting report: %w", err) + } + // found below delayedMessage - so batchnum can be pruned but above it cannot + return batchNum + 1, nil + } + } + return math.MaxUint64, nil +} + +func (m *MessagePruner) prune(ctx context.Context, count arbutil.MessageIndex, globalState validator.GoGlobalState) error { + endBatchCount := globalState.Batch if endBatchCount == 0 { - return m.config().MessagePruneInterval + return nil } endBatchMetadata, err := m.inboxTracker.GetBatchMetadata(endBatchCount - 1) if err != nil { - log.Error("error getting last batch metadata", "batch", endBatchCount-1, "err", err) - return m.config().MessagePruneInterval + return err } - deleteOldMessageFromDB(endBatchCount, endBatchMetadata, m.inboxTracker.db, m.transactionStreamer.db) - return m.config().MessagePruneInterval + msgCount := endBatchMetadata.MessageCount + delayedCount := endBatchMetadata.DelayedMessageCount + + batchPruneLimit, err := m.findBatchReport(ctx, delayedCount) + if err != nil { + return fmt.Errorf("failed finding batch report: %w", err) + } + if batchPruneLimit < endBatchCount { + endBatchCount = batchPruneLimit + } + return deleteOldMessageFromDB(ctx, endBatchCount, msgCount, delayedCount, m.inboxTracker.db, m.transactionStreamer.db) } -func deleteOldMessageFromDB(endBatchCount uint64, endBatchMetadata BatchMetadata, inboxTrackerDb ethdb.Database, transactionStreamerDb ethdb.Database) { - prunedKeysRange, err := deleteFromLastPrunedUptoEndKey(inboxTrackerDb, sequencerBatchMetaPrefix, endBatchCount) +func deleteOldMessageFromDB(ctx context.Context, endBatchCount uint64, messageCount arbutil.MessageIndex, delayedMessageCount uint64, inboxTrackerDb ethdb.Database, transactionStreamerDb ethdb.Database) error { + prunedKeysRange, err := deleteFromLastPrunedUptoEndKey(ctx, inboxTrackerDb, sequencerBatchMetaPrefix, endBatchCount) if err != nil { - log.Error("error deleting batch metadata", "err", err) - return + return fmt.Errorf("error deleting batch metadata: %w", err) } if len(prunedKeysRange) > 0 { log.Info("Pruned batches:", "first pruned key", prunedKeysRange[0], "last pruned key", prunedKeysRange[len(prunedKeysRange)-1]) } - prunedKeysRange, err = deleteFromLastPrunedUptoEndKey(transactionStreamerDb, messagePrefix, uint64(endBatchMetadata.MessageCount)) + prunedKeysRange, err = deleteFromLastPrunedUptoEndKey(ctx, transactionStreamerDb, messagePrefix, uint64(messageCount)) if err != nil { - log.Error("error deleting last batch messages", "err", err) - return + return fmt.Errorf("error deleting last batch messages: %w", err) } if len(prunedKeysRange) > 0 { log.Info("Pruned last batch messages:", "first pruned key", prunedKeysRange[0], "last pruned key", prunedKeysRange[len(prunedKeysRange)-1]) } - prunedKeysRange, err = deleteFromLastPrunedUptoEndKey(inboxTrackerDb, rlpDelayedMessagePrefix, endBatchMetadata.DelayedMessageCount) + prunedKeysRange, err = deleteFromLastPrunedUptoEndKey(ctx, inboxTrackerDb, rlpDelayedMessagePrefix, delayedMessageCount) if err != nil { - log.Error("error deleting last batch delayed messages", "err", err) - return + return fmt.Errorf("error deleting last batch delayed messages: %w", err) } if len(prunedKeysRange) > 0 { log.Info("Pruned last batch delayed messages:", "first pruned key", prunedKeysRange[0], "last pruned key", prunedKeysRange[len(prunedKeysRange)-1]) } + return nil } -func deleteFromLastPrunedUptoEndKey(db ethdb.Database, prefix []byte, endMinKey uint64) ([]uint64, error) { +func deleteFromLastPrunedUptoEndKey(ctx context.Context, db ethdb.Database, prefix []byte, endMinKey uint64) ([]uint64, error) { startIter := db.NewIterator(prefix, uint64ToKey(1)) if !startIter.Next() { return nil, nil @@ -125,7 +213,7 @@ func deleteFromLastPrunedUptoEndKey(db ethdb.Database, prefix []byte, endMinKey startMinKey := binary.BigEndian.Uint64(bytes.TrimPrefix(startIter.Key(), prefix)) startIter.Release() if endMinKey > startMinKey { - return deleteFromRange(db, prefix, startMinKey, endMinKey-1) + return deleteFromRange(ctx, db, prefix, startMinKey, endMinKey-1) } return nil, nil } diff --git a/arbnode/message_pruner_test.go b/arbnode/message_pruner_test.go index 16c1d6b71c..4c09aa8c6b 100644 --- a/arbnode/message_pruner_test.go +++ b/arbnode/message_pruner_test.go @@ -4,6 +4,7 @@ package arbnode import ( + "context" "testing" "github.com/ethereum/go-ethereum/core/rawdb" @@ -11,13 +12,17 @@ import ( ) func TestMessagePrunerWithPruningEligibleMessagePresent(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + endBatchCount := uint64(2 * 100 * 1024) endBatchMetadata := BatchMetadata{ MessageCount: 2 * 100 * 1024, DelayedMessageCount: 2 * 100 * 1024, } inboxTrackerDb, transactionStreamerDb := setupDatabase(t, endBatchCount, endBatchMetadata) - deleteOldMessageFromDB(endBatchCount, endBatchMetadata, inboxTrackerDb, transactionStreamerDb) + err := deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + Require(t, err) checkDbKeys(t, endBatchCount, inboxTrackerDb, sequencerBatchMetaPrefix) checkDbKeys(t, uint64(endBatchMetadata.MessageCount), transactionStreamerDb, messagePrefix) @@ -26,29 +31,38 @@ func TestMessagePrunerWithPruningEligibleMessagePresent(t *testing.T) { } func TestMessagePrunerTraverseEachMessageOnlyOnce(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + endBatchCount := uint64(10) endBatchMetadata := BatchMetadata{} inboxTrackerDb, transactionStreamerDb := setupDatabase(t, endBatchCount, endBatchMetadata) // In first iteration message till endBatchCount are tried to be deleted. - deleteOldMessageFromDB(endBatchCount, endBatchMetadata, inboxTrackerDb, transactionStreamerDb) + err := deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + Require(t, err) // In first iteration all the message till endBatchCount are deleted. checkDbKeys(t, endBatchCount, inboxTrackerDb, sequencerBatchMetaPrefix) // After first iteration endBatchCount/2 is reinserted in inbox db - err := inboxTrackerDb.Put(dbKey(sequencerBatchMetaPrefix, endBatchCount/2), []byte{}) + err = inboxTrackerDb.Put(dbKey(sequencerBatchMetaPrefix, endBatchCount/2), []byte{}) Require(t, err) // In second iteration message till endBatchCount are again tried to be deleted. - deleteOldMessageFromDB(endBatchCount, endBatchMetadata, inboxTrackerDb, transactionStreamerDb) + err = deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + Require(t, err) // In second iteration all the message till endBatchCount are deleted again. checkDbKeys(t, endBatchCount, inboxTrackerDb, sequencerBatchMetaPrefix) } func TestMessagePrunerPruneTillLessThenEqualTo(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + endBatchCount := uint64(10) endBatchMetadata := BatchMetadata{} inboxTrackerDb, transactionStreamerDb := setupDatabase(t, 2*endBatchCount, endBatchMetadata) err := inboxTrackerDb.Delete(dbKey(sequencerBatchMetaPrefix, 9)) Require(t, err) - deleteOldMessageFromDB(endBatchCount, endBatchMetadata, inboxTrackerDb, transactionStreamerDb) + err = deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + Require(t, err) hasKey, err := inboxTrackerDb.Has(dbKey(sequencerBatchMetaPrefix, 10)) Require(t, err) if !hasKey { @@ -57,13 +71,17 @@ func TestMessagePrunerPruneTillLessThenEqualTo(t *testing.T) { } func TestMessagePrunerWithNoPruningEligibleMessagePresent(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + endBatchCount := uint64(2) endBatchMetadata := BatchMetadata{ MessageCount: 2, DelayedMessageCount: 2, } inboxTrackerDb, transactionStreamerDb := setupDatabase(t, endBatchCount, endBatchMetadata) - deleteOldMessageFromDB(endBatchCount, endBatchMetadata, inboxTrackerDb, transactionStreamerDb) + err := deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + Require(t, err) checkDbKeys(t, endBatchCount, inboxTrackerDb, sequencerBatchMetaPrefix) checkDbKeys(t, uint64(endBatchMetadata.MessageCount), transactionStreamerDb, messagePrefix) diff --git a/arbnode/node.go b/arbnode/node.go index 46968678f6..c559eb6298 100644 --- a/arbnode/node.go +++ b/arbnode/node.go @@ -791,6 +791,8 @@ func createNodeImpl( } var stakerObj *staker.Staker + var messagePruner *MessagePruner + if config.Staker.Enable { var wallet staker.ValidatorWalletInterface if config.Staker.UseSmartContractWallet || txOptsValidator == nil { @@ -817,7 +819,13 @@ func createNodeImpl( } } - stakerObj, err = staker.NewStaker(l1Reader, wallet, bind.CallOpts{}, config.Staker, blockValidator, statelessBlockValidator, deployInfo.ValidatorUtils, fatalErrChan) + notifiers := make([]staker.LatestStakedNotifier, 0) + if config.MessagePruner.Enable && !config.Caching.Archive { + messagePruner = NewMessagePruner(txStreamer, inboxTracker, func() *MessagePrunerConfig { return &configFetcher.Get().MessagePruner }) + notifiers = append(notifiers, messagePruner) + } + + stakerObj, err = staker.NewStaker(l1Reader, wallet, bind.CallOpts{}, config.Staker, blockValidator, statelessBlockValidator, notifiers, deployInfo.ValidatorUtils, fatalErrChan) if err != nil { return nil, err } @@ -849,10 +857,6 @@ func createNodeImpl( return nil, err } } - var messagePruner *MessagePruner - if config.MessagePruner.Enable && !config.Caching.Archive && stakerObj != nil { - messagePruner = NewMessagePruner(txStreamer, inboxTracker, stakerObj, func() *MessagePrunerConfig { return &configFetcher.Get().MessagePruner }) - } // always create DelayedSequencer, it won't do anything if it is disabled delayedSequencer, err = NewDelayedSequencer(l1Reader, inboxReader, exec.ExecEngine, coordinator, func() *DelayedSequencerConfig { return &configFetcher.Get().DelayedSequencer }) if err != nil { diff --git a/arbnode/transaction_streamer.go b/arbnode/transaction_streamer.go index 2d371f4788..7752c69e8e 100644 --- a/arbnode/transaction_streamer.go +++ b/arbnode/transaction_streamer.go @@ -206,12 +206,16 @@ func deleteStartingAt(db ethdb.Database, batch ethdb.Batch, prefix []byte, minKe } // deleteFromRange deletes key ranging from startMinKey(inclusive) to endMinKey(exclusive) -func deleteFromRange(db ethdb.Database, prefix []byte, startMinKey uint64, endMinKey uint64) ([]uint64, error) { +// might have deleted some keys even if returning an error +func deleteFromRange(ctx context.Context, db ethdb.Database, prefix []byte, startMinKey uint64, endMinKey uint64) ([]uint64, error) { batch := db.NewBatch() startIter := db.NewIterator(prefix, uint64ToKey(startMinKey)) defer startIter.Release() var prunedKeysRange []uint64 for startIter.Next() { + if ctx.Err() != nil { + return nil, ctx.Err() + } currentKey := binary.BigEndian.Uint64(bytes.TrimPrefix(startIter.Key(), prefix)) if currentKey >= endMinKey { break diff --git a/staker/block_validator.go b/staker/block_validator.go index 7ec395fb10..f2671ebe44 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -816,32 +816,33 @@ func (v *BlockValidator) InitAssumeValid(globalState validator.GoGlobalState) er return nil } -func (v *BlockValidator) AssumeValid(count arbutil.MessageIndex, globalState validator.GoGlobalState) error { +func (v *BlockValidator) UpdateLatestStaked(count arbutil.MessageIndex, globalState validator.GoGlobalState) { if count <= v.validated() { - return nil + return } v.reorgMutex.Lock() defer v.reorgMutex.Unlock() if count <= v.validated() { - return nil + return } if !v.chainCaughtUp { if !v.validGSIsNew(globalState) { - return nil + return } v.legacyValidInfo = nil v.lastValidGS = globalState - return nil + return } countUint64 := uint64(count) msg, err := v.streamer.GetMessage(count - 1) if err != nil { - return err + log.Error("getMessage error", "err", err, "count", count) + return } // delete no-longer relevant entries for iPos := v.validated(); iPos < count && iPos < v.created(); iPos++ { @@ -870,8 +871,6 @@ func (v *BlockValidator) AssumeValid(count arbutil.MessageIndex, globalState val log.Error("failed writing valid state after reorg", "err", err) } nonBlockingTrigger(v.createNodesChan) - - return nil } // Because batches and blocks are handled at separate layers in the node, diff --git a/staker/staker.go b/staker/staker.go index 4ac85c2895..f48290af36 100644 --- a/staker/staker.go +++ b/staker/staker.go @@ -19,9 +19,11 @@ import ( "github.com/ethereum/go-ethereum/metrics" flag "github.com/spf13/pflag" + "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/cmd/genericconf" "github.com/offchainlabs/nitro/util/arbmath" "github.com/offchainlabs/nitro/util/stopwaiter" + "github.com/offchainlabs/nitro/validator" ) var ( @@ -186,10 +188,15 @@ type nodeAndHash struct { hash common.Hash } +type LatestStakedNotifier interface { + UpdateLatestStaked(count arbutil.MessageIndex, globalState validator.GoGlobalState) +} + type Staker struct { *L1Validator stopwaiter.StopWaiter l1Reader L1ReaderInterface + notifiers []LatestStakedNotifier activeChallenge *ChallengeManager baseCallOpts bind.CallOpts config L1ValidatorConfig @@ -209,6 +216,7 @@ func NewStaker( config L1ValidatorConfig, blockValidator *BlockValidator, statelessBlockValidator *StatelessBlockValidator, + notifiers []LatestStakedNotifier, validatorUtilsAddress common.Address, fatalErr chan<- error, ) (*Staker, error) { @@ -226,6 +234,7 @@ func NewStaker( return &Staker{ L1Validator: val, l1Reader: l1Reader, + notifiers: notifiers, baseCallOpts: callOpts, config: config, highGasBlocksBuffer: big.NewInt(config.L1PostingStrategy.HighGasDelayBlocks), @@ -294,8 +303,22 @@ func (s *Staker) checkLatestStaked(ctx context.Context) error { log.Info("latest valid not yet in our node", "staked", stakedGlobalState) return nil } + + processedCount, err := s.txStreamer.GetProcessedMessageCount() + if err != nil { + return err + } + + if processedCount < count { + log.Info("execution catching up to last validated", "validatedCount", count, "processedCount", processedCount) + return nil + } + if s.blockValidator != nil && s.config.StartFromStaked { - return s.blockValidator.AssumeValid(count, stakedGlobalState) + s.blockValidator.UpdateLatestStaked(count, stakedGlobalState) + } + for _, notifier := range s.notifiers { + notifier.UpdateLatestStaked(count, stakedGlobalState) } return nil } diff --git a/system_tests/staker_test.go b/system_tests/staker_test.go index aa1ae0a64e..7a3ae41814 100644 --- a/system_tests/staker_test.go +++ b/system_tests/staker_test.go @@ -165,6 +165,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) valConfig, nil, statelessA, + nil, l2nodeA.DeployInfo.ValidatorUtils, nil, ) @@ -199,6 +200,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) valConfig, nil, statelessB, + nil, l2nodeB.DeployInfo.ValidatorUtils, nil, ) @@ -220,6 +222,7 @@ func stakerTestImpl(t *testing.T, faultyStaker bool, honestStakerInactive bool) valConfig, nil, statelessA, + nil, l2nodeA.DeployInfo.ValidatorUtils, nil, ) From 63df5aefbf36fefb19e58d45b9701c50cd3e9974 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 30 Jun 2023 16:26:21 -0600 Subject: [PATCH 50/63] message_pruner: min-batches-left --- arbnode/message_pruner.go | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arbnode/message_pruner.go b/arbnode/message_pruner.go index 07ca12e3f2..1c2191f661 100644 --- a/arbnode/message_pruner.go +++ b/arbnode/message_pruner.go @@ -37,6 +37,7 @@ type MessagePrunerConfig struct { Enable bool `koanf:"enable"` MessagePruneInterval time.Duration `koanf:"prune-interval" reload:"hot"` SearchBatchReportLimit int64 `koanf:"search-batch-report" reload:"hot"` + MinBatchesLeft uint64 `koanf:"min-batches-left" reload:"hot"` } type MessagePrunerConfigFetcher func() *MessagePrunerConfig @@ -45,12 +46,14 @@ var DefaultMessagePrunerConfig = MessagePrunerConfig{ Enable: true, MessagePruneInterval: time.Minute, SearchBatchReportLimit: 100000, + MinBatchesLeft: 2, } func MessagePrunerConfigAddOptions(prefix string, f *flag.FlagSet) { f.Bool(prefix+".enable", DefaultMessagePrunerConfig.Enable, "enable message pruning") f.Duration(prefix+".prune-interval", DefaultMessagePrunerConfig.MessagePruneInterval, "interval for running message pruner") f.Int64(prefix+"search-batch-report", DefaultMessagePrunerConfig.SearchBatchReportLimit, "limit for searching for a batch report when pruning (negative disables)") + f.Uint64(prefix+"min-batches-left", DefaultMessagePrunerConfig.MinBatchesLeft, "min number of batches not pruned") } func NewMessagePruner(transactionStreamer *TransactionStreamer, inboxTracker *InboxTracker, config MessagePrunerConfigFetcher) *MessagePruner { @@ -157,11 +160,22 @@ func (m *MessagePruner) findBatchReport(ctx context.Context, delayedMsgStart uin } func (m *MessagePruner) prune(ctx context.Context, count arbutil.MessageIndex, globalState validator.GoGlobalState) error { - endBatchCount := globalState.Batch - if endBatchCount == 0 { + trimBatchCount := globalState.Batch + minBatchesLeft := m.config().MinBatchesLeft + if trimBatchCount < minBatchesLeft { return nil } - endBatchMetadata, err := m.inboxTracker.GetBatchMetadata(endBatchCount - 1) + batchCount, err := m.inboxTracker.GetBatchCount() + if err != nil { + return err + } + if trimBatchCount+minBatchesLeft < batchCount { + if batchCount < minBatchesLeft { + return nil + } + trimBatchCount = batchCount - minBatchesLeft + } + endBatchMetadata, err := m.inboxTracker.GetBatchMetadata(trimBatchCount - 1) if err != nil { return err } @@ -172,10 +186,10 @@ func (m *MessagePruner) prune(ctx context.Context, count arbutil.MessageIndex, g if err != nil { return fmt.Errorf("failed finding batch report: %w", err) } - if batchPruneLimit < endBatchCount { - endBatchCount = batchPruneLimit + if batchPruneLimit < trimBatchCount { + trimBatchCount = batchPruneLimit } - return deleteOldMessageFromDB(ctx, endBatchCount, msgCount, delayedCount, m.inboxTracker.db, m.transactionStreamer.db) + return deleteOldMessageFromDB(ctx, trimBatchCount, msgCount, delayedCount, m.inboxTracker.db, m.transactionStreamer.db) } func deleteOldMessageFromDB(ctx context.Context, endBatchCount uint64, messageCount arbutil.MessageIndex, delayedMessageCount uint64, inboxTrackerDb ethdb.Database, transactionStreamerDb ethdb.Database) error { From c09cd9e956bdccb9a1bb88a7c4bd2838b4785e2f Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 30 Jun 2023 17:16:44 -0600 Subject: [PATCH 51/63] pruner: fix config options --- arbnode/message_pruner.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arbnode/message_pruner.go b/arbnode/message_pruner.go index 1c2191f661..a12d824e58 100644 --- a/arbnode/message_pruner.go +++ b/arbnode/message_pruner.go @@ -52,8 +52,8 @@ var DefaultMessagePrunerConfig = MessagePrunerConfig{ func MessagePrunerConfigAddOptions(prefix string, f *flag.FlagSet) { f.Bool(prefix+".enable", DefaultMessagePrunerConfig.Enable, "enable message pruning") f.Duration(prefix+".prune-interval", DefaultMessagePrunerConfig.MessagePruneInterval, "interval for running message pruner") - f.Int64(prefix+"search-batch-report", DefaultMessagePrunerConfig.SearchBatchReportLimit, "limit for searching for a batch report when pruning (negative disables)") - f.Uint64(prefix+"min-batches-left", DefaultMessagePrunerConfig.MinBatchesLeft, "min number of batches not pruned") + f.Int64(prefix+".search-batch-report", DefaultMessagePrunerConfig.SearchBatchReportLimit, "limit for searching for a batch report when pruning (negative disables)") + f.Uint64(prefix+".min-batches-left", DefaultMessagePrunerConfig.MinBatchesLeft, "min number of batches not pruned") } func NewMessagePruner(transactionStreamer *TransactionStreamer, inboxTracker *InboxTracker, config MessagePrunerConfigFetcher) *MessagePruner { From e193ac05b4aee994e29902c7c6d6a5c5846f73d0 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 5 Jul 2023 19:51:56 -0600 Subject: [PATCH 52/63] calliterativelywith: avoid overhead for duration 0 --- go-ethereum | 2 +- util/stopwaiter/stopwaiter.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/go-ethereum b/go-ethereum index f7609eef4b..8e6a8ad494 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit f7609eef4bbd3a500292d12eed03956ffc5d6527 +Subproject commit 8e6a8ad4942591011e833e6ebceca6bd668f3db0 diff --git a/util/stopwaiter/stopwaiter.go b/util/stopwaiter/stopwaiter.go index 28370b9f28..0accf1503e 100644 --- a/util/stopwaiter/stopwaiter.go +++ b/util/stopwaiter/stopwaiter.go @@ -235,6 +235,9 @@ func CallIterativelyWith[T any]( if ctx.Err() != nil { return } + if interval == time.Duration(0) { + continue + } val = defaultVal timer := time.NewTimer(interval) select { From 947f8f7fb802b85357df15d4247348524ce7e6b2 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Wed, 5 Jul 2023 20:07:05 -0600 Subject: [PATCH 53/63] CallIterativelyWith: fix trigger val if duration is 0 --- util/stopwaiter/stopwaiter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/stopwaiter/stopwaiter.go b/util/stopwaiter/stopwaiter.go index 0accf1503e..1e70e328eb 100644 --- a/util/stopwaiter/stopwaiter.go +++ b/util/stopwaiter/stopwaiter.go @@ -235,10 +235,10 @@ func CallIterativelyWith[T any]( if ctx.Err() != nil { return } + val = defaultVal if interval == time.Duration(0) { continue } - val = defaultVal timer := time.NewTimer(interval) select { case <-ctx.Done(): From 5b18e37b30519a7f6c3842a16909225c1bec06c4 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 6 Jul 2023 14:12:15 -0600 Subject: [PATCH 54/63] message pruner: fix bug checking if enough batches left --- arbnode/message_pruner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arbnode/message_pruner.go b/arbnode/message_pruner.go index a12d824e58..beac143762 100644 --- a/arbnode/message_pruner.go +++ b/arbnode/message_pruner.go @@ -169,7 +169,7 @@ func (m *MessagePruner) prune(ctx context.Context, count arbutil.MessageIndex, g if err != nil { return err } - if trimBatchCount+minBatchesLeft < batchCount { + if trimBatchCount+minBatchesLeft > batchCount { if batchCount < minBatchesLeft { return nil } From 89fda395f199cf85cececb7854583029bc5466fc Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 6 Jul 2023 14:57:00 -0600 Subject: [PATCH 55/63] pruner: minor fixes following review comments --- arbnode/message_pruner.go | 58 ++++++++++----------------------------- 1 file changed, 14 insertions(+), 44 deletions(-) diff --git a/arbnode/message_pruner.go b/arbnode/message_pruner.go index beac143762..0991426625 100644 --- a/arbnode/message_pruner.go +++ b/arbnode/message_pruner.go @@ -36,7 +36,7 @@ type MessagePruner struct { type MessagePrunerConfig struct { Enable bool `koanf:"enable"` MessagePruneInterval time.Duration `koanf:"prune-interval" reload:"hot"` - SearchBatchReportLimit int64 `koanf:"search-batch-report" reload:"hot"` + SearchBatchReportLimit uint64 `koanf:"search-batch-report" reload:"hot"` MinBatchesLeft uint64 `koanf:"min-batches-left" reload:"hot"` } @@ -52,7 +52,7 @@ var DefaultMessagePrunerConfig = MessagePrunerConfig{ func MessagePrunerConfigAddOptions(prefix string, f *flag.FlagSet) { f.Bool(prefix+".enable", DefaultMessagePrunerConfig.Enable, "enable message pruning") f.Duration(prefix+".prune-interval", DefaultMessagePrunerConfig.MessagePruneInterval, "interval for running message pruner") - f.Int64(prefix+".search-batch-report", DefaultMessagePrunerConfig.SearchBatchReportLimit, "limit for searching for a batch report when pruning (negative disables)") + f.Uint64(prefix+".search-batch-report", DefaultMessagePrunerConfig.SearchBatchReportLimit, "limit for searching for a batch report when pruning (0 disables)") f.Uint64(prefix+".min-batches-left", DefaultMessagePrunerConfig.MinBatchesLeft, "min number of batches not pruned") } @@ -81,7 +81,7 @@ func (m *MessagePruner) UpdateLatestStaked(count arbutil.MessageIndex, globalSta err := m.LaunchThreadSafe(func(ctx context.Context) { defer m.pruningLock.Unlock() err := m.prune(ctx, count, globalState) - if err != nil { + if err != nil && ctx.Err() == nil { log.Error("error while pruning", "err", err) } }) @@ -95,8 +95,8 @@ func (m *MessagePruner) UpdateLatestStaked(count arbutil.MessageIndex, globalSta // returns number of batch for which report was found (meaning - it should not be pruned) // if not found - returns maxUint64 (no limit on pruning) func (m *MessagePruner) findBatchReport(ctx context.Context, delayedMsgStart uint64) (uint64, error) { - searchLimit := m.config().SearchBatchReportLimit - if searchLimit < 0 { + searchLimitCfg := m.config().SearchBatchReportLimit + if searchLimitCfg == 0 { return math.MaxUint64, nil } delayedCount, err := m.inboxTracker.GetDelayedCount() @@ -106,12 +106,11 @@ func (m *MessagePruner) findBatchReport(ctx context.Context, delayedMsgStart uin if delayedCount <= delayedMsgStart { return 0, errors.New("delayedCount behind pruning target") } - searchUpTil := delayedCount - searchUpLimit := delayedMsgStart + uint64(searchLimit) - if searchLimit > 0 && searchUpLimit < searchUpTil { - searchUpTil = searchUpLimit + searchLimit := delayedMsgStart + searchLimitCfg + if searchLimit < delayedCount { + searchLimit = delayedCount } - for delayed := delayedMsgStart; delayed < searchUpTil; delayed++ { + for delayed := delayedMsgStart; delayed < searchLimit; delayed++ { if ctx.Err() != nil { return 0, ctx.Err() } @@ -127,54 +126,25 @@ func (m *MessagePruner) findBatchReport(ctx context.Context, delayedMsgStart uin return batchNum, nil } } - searchDownLimit := uint64(0) - if searchLimit > 0 { - searchedUp := searchUpTil - delayedMsgStart - limitRemaining := uint64(searchLimit) - searchedUp - if limitRemaining < delayedMsgStart { - searchDownLimit = delayedMsgStart - limitRemaining - } - } - for delayed := delayedMsgStart - 1; delayed >= searchDownLimit; delayed-- { - if ctx.Err() != nil { - return 0, ctx.Err() - } - msg, err := m.inboxTracker.GetDelayedMessage(delayed) - if errors.Is(err, AccumulatorNotFoundErr) { - // older delayed probably pruned - assume we won't find a report - return math.MaxUint64, nil - } - if err != nil { - return 0, err - } - if msg.Header.Kind == arbostypes.L1MessageType_BatchPostingReport { - _, _, _, batchNum, _, _, err := arbostypes.ParseBatchPostingReportMessageFields(bytes.NewReader(msg.L2msg)) - if err != nil { - return 0, fmt.Errorf("trying to parse batch-posting report: %w", err) - } - // found below delayedMessage - so batchnum can be pruned but above it cannot - return batchNum + 1, nil - } - } - return math.MaxUint64, nil + return 0, errors.New("Batch post report not found. Try adjusting search-batch-report") } func (m *MessagePruner) prune(ctx context.Context, count arbutil.MessageIndex, globalState validator.GoGlobalState) error { trimBatchCount := globalState.Batch minBatchesLeft := m.config().MinBatchesLeft - if trimBatchCount < minBatchesLeft { - return nil - } batchCount, err := m.inboxTracker.GetBatchCount() if err != nil { return err } - if trimBatchCount+minBatchesLeft > batchCount { + if batchCount < trimBatchCount+minBatchesLeft { if batchCount < minBatchesLeft { return nil } trimBatchCount = batchCount - minBatchesLeft } + if trimBatchCount < 1 { + return nil + } endBatchMetadata, err := m.inboxTracker.GetBatchMetadata(trimBatchCount - 1) if err != nil { return err From e7030af045b047a50e554b978f83fb772d41aa3a Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 6 Jul 2023 15:00:47 -0600 Subject: [PATCH 56/63] staker: move block_validator into notifiers --- staker/staker.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/staker/staker.go b/staker/staker.go index f48290af36..f20a045d37 100644 --- a/staker/staker.go +++ b/staker/staker.go @@ -231,6 +231,9 @@ func NewStaker( return nil, err } stakerLastSuccessfulActionGauge.Update(time.Now().Unix()) + if config.StartFromStaked { + notifiers = append(notifiers, blockValidator) + } return &Staker{ L1Validator: val, l1Reader: l1Reader, @@ -314,9 +317,6 @@ func (s *Staker) checkLatestStaked(ctx context.Context) error { return nil } - if s.blockValidator != nil && s.config.StartFromStaked { - s.blockValidator.UpdateLatestStaked(count, stakedGlobalState) - } for _, notifier := range s.notifiers { notifier.UpdateLatestStaked(count, stakedGlobalState) } From b56c734dc9011c970b2c080d49cf896b05bcc98d Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 6 Jul 2023 18:16:03 -0600 Subject: [PATCH 57/63] update geth, add recordingDb config --- arbnode/execution/block_recorder.go | 4 +-- arbnode/execution/node.go | 3 +- arbnode/node.go | 49 +++++++++++++++-------------- go-ethereum | 2 +- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/arbnode/execution/block_recorder.go b/arbnode/execution/block_recorder.go index 79da88821c..dc5daa6f7b 100644 --- a/arbnode/execution/block_recorder.go +++ b/arbnode/execution/block_recorder.go @@ -46,10 +46,10 @@ type RecordResult struct { BatchInfo []validator.BatchInfo } -func NewBlockRecorder(execEngine *ExecutionEngine, ethDb ethdb.Database) *BlockRecorder { +func NewBlockRecorder(config *arbitrum.RecordingDatabaseConfig, execEngine *ExecutionEngine, ethDb ethdb.Database) *BlockRecorder { recorder := &BlockRecorder{ execEngine: execEngine, - recordingDatabase: arbitrum.NewRecordingDatabase(ethDb, execEngine.bc), + recordingDatabase: arbitrum.NewRecordingDatabase(config, ethDb, execEngine.bc), } execEngine.SetRecorder(recorder) return recorder diff --git a/arbnode/execution/node.go b/arbnode/execution/node.go index 8b9cdd7538..7456c1d6a7 100644 --- a/arbnode/execution/node.go +++ b/arbnode/execution/node.go @@ -31,6 +31,7 @@ func CreateExecutionNode( fwTarget string, fwConfig *ForwarderConfig, rpcConfig arbitrum.Config, + recordingDbConfig *arbitrum.RecordingDatabaseConfig, seqConfigFetcher SequencerConfigFetcher, precheckConfigFetcher TxPreCheckerConfigFetcher, ) (*ExecutionNode, error) { @@ -38,7 +39,7 @@ func CreateExecutionNode( if err != nil { return nil, err } - recorder := NewBlockRecorder(execEngine, chainDB) + recorder := NewBlockRecorder(recordingDbConfig, execEngine, chainDB) var txPublisher TransactionPublisher var sequencer *Sequencer seqConfig := seqConfigFetcher() diff --git a/arbnode/node.go b/arbnode/node.go index c559eb6298..bf0b5e2db9 100644 --- a/arbnode/node.go +++ b/arbnode/node.go @@ -307,28 +307,29 @@ func DeployOnL1(ctx context.Context, l1client arbutil.L1Interface, deployAuth *b } type Config struct { - RPC arbitrum.Config `koanf:"rpc"` - Sequencer execution.SequencerConfig `koanf:"sequencer" reload:"hot"` - L1Reader headerreader.Config `koanf:"parent-chain-reader" reload:"hot"` - InboxReader InboxReaderConfig `koanf:"inbox-reader" reload:"hot"` - DelayedSequencer DelayedSequencerConfig `koanf:"delayed-sequencer" reload:"hot"` - BatchPoster BatchPosterConfig `koanf:"batch-poster" reload:"hot"` - MessagePruner MessagePrunerConfig `koanf:"message-pruner" reload:"hot"` - ForwardingTargetImpl string `koanf:"forwarding-target"` - Forwarder execution.ForwarderConfig `koanf:"forwarder"` - TxPreChecker execution.TxPreCheckerConfig `koanf:"tx-pre-checker" reload:"hot"` - BlockValidator staker.BlockValidatorConfig `koanf:"block-validator" reload:"hot"` - Feed broadcastclient.FeedConfig `koanf:"feed" reload:"hot"` - Staker staker.L1ValidatorConfig `koanf:"staker"` - SeqCoordinator SeqCoordinatorConfig `koanf:"seq-coordinator"` - DataAvailability das.DataAvailabilityConfig `koanf:"data-availability"` - SyncMonitor SyncMonitorConfig `koanf:"sync-monitor"` - Dangerous DangerousConfig `koanf:"dangerous"` - Caching execution.CachingConfig `koanf:"caching"` - Archive bool `koanf:"archive"` - TxLookupLimit uint64 `koanf:"tx-lookup-limit"` - TransactionStreamer TransactionStreamerConfig `koanf:"transaction-streamer" reload:"hot"` - Maintenance MaintenanceConfig `koanf:"maintenance" reload:"hot"` + RPC arbitrum.Config `koanf:"rpc"` + Sequencer execution.SequencerConfig `koanf:"sequencer" reload:"hot"` + L1Reader headerreader.Config `koanf:"parent-chain-reader" reload:"hot"` + InboxReader InboxReaderConfig `koanf:"inbox-reader" reload:"hot"` + DelayedSequencer DelayedSequencerConfig `koanf:"delayed-sequencer" reload:"hot"` + BatchPoster BatchPosterConfig `koanf:"batch-poster" reload:"hot"` + MessagePruner MessagePrunerConfig `koanf:"message-pruner" reload:"hot"` + ForwardingTargetImpl string `koanf:"forwarding-target"` + Forwarder execution.ForwarderConfig `koanf:"forwarder"` + TxPreChecker execution.TxPreCheckerConfig `koanf:"tx-pre-checker" reload:"hot"` + BlockValidator staker.BlockValidatorConfig `koanf:"block-validator" reload:"hot"` + RecordingDB arbitrum.RecordingDatabaseConfig `koanf:"recording-database"` + Feed broadcastclient.FeedConfig `koanf:"feed" reload:"hot"` + Staker staker.L1ValidatorConfig `koanf:"staker"` + SeqCoordinator SeqCoordinatorConfig `koanf:"seq-coordinator"` + DataAvailability das.DataAvailabilityConfig `koanf:"data-availability"` + SyncMonitor SyncMonitorConfig `koanf:"sync-monitor"` + Dangerous DangerousConfig `koanf:"dangerous"` + Caching execution.CachingConfig `koanf:"caching"` + Archive bool `koanf:"archive"` + TxLookupLimit uint64 `koanf:"tx-lookup-limit"` + TransactionStreamer TransactionStreamerConfig `koanf:"transaction-streamer" reload:"hot"` + Maintenance MaintenanceConfig `koanf:"maintenance" reload:"hot"` } func (c *Config) Validate() error { @@ -392,6 +393,7 @@ func ConfigAddOptions(prefix string, f *flag.FlagSet, feedInputEnable bool, feed execution.AddOptionsForNodeForwarderConfig(prefix+".forwarder", f) execution.TxPreCheckerConfigAddOptions(prefix+".tx-pre-checker", f) staker.BlockValidatorConfigAddOptions(prefix+".block-validator", f) + arbitrum.RecordingDatabaseConfigAddOptions(prefix+".recording-database", f) broadcastclient.FeedConfigAddOptions(prefix+".feed", f, feedInputEnable, feedOutputEnable) staker.L1ValidatorConfigAddOptions(prefix+".staker", f) SeqCoordinatorConfigAddOptions(prefix+".seq-coordinator", f) @@ -418,6 +420,7 @@ var ConfigDefault = Config{ ForwardingTargetImpl: "", TxPreChecker: execution.DefaultTxPreCheckerConfig, BlockValidator: staker.DefaultBlockValidatorConfig, + RecordingDB: arbitrum.DefaultRecordingDatabaseConfig, Feed: broadcastclient.FeedConfigDefault, Staker: staker.DefaultL1ValidatorConfig, SeqCoordinator: DefaultSeqCoordinatorConfig, @@ -605,7 +608,7 @@ func createNodeImpl( sequencerConfigFetcher := func() *execution.SequencerConfig { return &configFetcher.Get().Sequencer } txprecheckConfigFetcher := func() *execution.TxPreCheckerConfig { return &configFetcher.Get().TxPreChecker } exec, err := execution.CreateExecutionNode(stack, chainDb, l2BlockChain, l1Reader, syncMonitor, - config.ForwardingTarget(), &config.Forwarder, config.RPC, + config.ForwardingTarget(), &config.Forwarder, config.RPC, &config.RecordingDB, sequencerConfigFetcher, txprecheckConfigFetcher) if err != nil { return nil, err diff --git a/go-ethereum b/go-ethereum index 8e6a8ad494..704d1b80c8 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit 8e6a8ad4942591011e833e6ebceca6bd668f3db0 +Subproject commit 704d1b80c8ffe2d29d18a07d37f3709425a489cc From 0f3eb15282be71bd2f64732d2b1f97afdcb6e37d Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 6 Jul 2023 19:49:54 -0600 Subject: [PATCH 58/63] message_pruner: dont prune batchmetadata --- arbnode/message_pruner.go | 77 ++++---------------------------- arbnode/message_pruner_test.go | 80 ++++++++++++++-------------------- 2 files changed, 41 insertions(+), 116 deletions(-) diff --git a/arbnode/message_pruner.go b/arbnode/message_pruner.go index 0991426625..aeee07ca73 100644 --- a/arbnode/message_pruner.go +++ b/arbnode/message_pruner.go @@ -7,16 +7,13 @@ import ( "bytes" "context" "encoding/binary" - "errors" "fmt" "sync" "time" - "github.com/ethereum/go-ethereum/common/math" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" - "github.com/offchainlabs/nitro/arbos/arbostypes" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/util/stopwaiter" "github.com/offchainlabs/nitro/validator" @@ -34,25 +31,22 @@ type MessagePruner struct { } type MessagePrunerConfig struct { - Enable bool `koanf:"enable"` - MessagePruneInterval time.Duration `koanf:"prune-interval" reload:"hot"` - SearchBatchReportLimit uint64 `koanf:"search-batch-report" reload:"hot"` - MinBatchesLeft uint64 `koanf:"min-batches-left" reload:"hot"` + Enable bool `koanf:"enable"` + MessagePruneInterval time.Duration `koanf:"prune-interval" reload:"hot"` + MinBatchesLeft uint64 `koanf:"min-batches-left" reload:"hot"` } type MessagePrunerConfigFetcher func() *MessagePrunerConfig var DefaultMessagePrunerConfig = MessagePrunerConfig{ - Enable: true, - MessagePruneInterval: time.Minute, - SearchBatchReportLimit: 100000, - MinBatchesLeft: 2, + Enable: true, + MessagePruneInterval: time.Minute, + MinBatchesLeft: 2, } func MessagePrunerConfigAddOptions(prefix string, f *flag.FlagSet) { f.Bool(prefix+".enable", DefaultMessagePrunerConfig.Enable, "enable message pruning") f.Duration(prefix+".prune-interval", DefaultMessagePrunerConfig.MessagePruneInterval, "interval for running message pruner") - f.Uint64(prefix+".search-batch-report", DefaultMessagePrunerConfig.SearchBatchReportLimit, "limit for searching for a batch report when pruning (0 disables)") f.Uint64(prefix+".min-batches-left", DefaultMessagePrunerConfig.MinBatchesLeft, "min number of batches not pruned") } @@ -91,44 +85,6 @@ func (m *MessagePruner) UpdateLatestStaked(count arbutil.MessageIndex, globalSta } } -// looks for batch posting report starting from delayed message delayedMsgStart -// returns number of batch for which report was found (meaning - it should not be pruned) -// if not found - returns maxUint64 (no limit on pruning) -func (m *MessagePruner) findBatchReport(ctx context.Context, delayedMsgStart uint64) (uint64, error) { - searchLimitCfg := m.config().SearchBatchReportLimit - if searchLimitCfg == 0 { - return math.MaxUint64, nil - } - delayedCount, err := m.inboxTracker.GetDelayedCount() - if err != nil { - return 0, err - } - if delayedCount <= delayedMsgStart { - return 0, errors.New("delayedCount behind pruning target") - } - searchLimit := delayedMsgStart + searchLimitCfg - if searchLimit < delayedCount { - searchLimit = delayedCount - } - for delayed := delayedMsgStart; delayed < searchLimit; delayed++ { - if ctx.Err() != nil { - return 0, ctx.Err() - } - msg, err := m.inboxTracker.GetDelayedMessage(delayed) - if err != nil { - return 0, err - } - if msg.Header.Kind == arbostypes.L1MessageType_BatchPostingReport { - _, _, _, batchNum, _, _, err := arbostypes.ParseBatchPostingReportMessageFields(bytes.NewReader(msg.L2msg)) - if err != nil { - return 0, fmt.Errorf("trying to parse batch-posting report: %w", err) - } - return batchNum, nil - } - } - return 0, errors.New("Batch post report not found. Try adjusting search-batch-report") -} - func (m *MessagePruner) prune(ctx context.Context, count arbutil.MessageIndex, globalState validator.GoGlobalState) error { trimBatchCount := globalState.Batch minBatchesLeft := m.config().MinBatchesLeft @@ -152,26 +108,11 @@ func (m *MessagePruner) prune(ctx context.Context, count arbutil.MessageIndex, g msgCount := endBatchMetadata.MessageCount delayedCount := endBatchMetadata.DelayedMessageCount - batchPruneLimit, err := m.findBatchReport(ctx, delayedCount) - if err != nil { - return fmt.Errorf("failed finding batch report: %w", err) - } - if batchPruneLimit < trimBatchCount { - trimBatchCount = batchPruneLimit - } - return deleteOldMessageFromDB(ctx, trimBatchCount, msgCount, delayedCount, m.inboxTracker.db, m.transactionStreamer.db) + return deleteOldMessageFromDB(ctx, msgCount, delayedCount, m.inboxTracker.db, m.transactionStreamer.db) } -func deleteOldMessageFromDB(ctx context.Context, endBatchCount uint64, messageCount arbutil.MessageIndex, delayedMessageCount uint64, inboxTrackerDb ethdb.Database, transactionStreamerDb ethdb.Database) error { - prunedKeysRange, err := deleteFromLastPrunedUptoEndKey(ctx, inboxTrackerDb, sequencerBatchMetaPrefix, endBatchCount) - if err != nil { - return fmt.Errorf("error deleting batch metadata: %w", err) - } - if len(prunedKeysRange) > 0 { - log.Info("Pruned batches:", "first pruned key", prunedKeysRange[0], "last pruned key", prunedKeysRange[len(prunedKeysRange)-1]) - } - - prunedKeysRange, err = deleteFromLastPrunedUptoEndKey(ctx, transactionStreamerDb, messagePrefix, uint64(messageCount)) +func deleteOldMessageFromDB(ctx context.Context, messageCount arbutil.MessageIndex, delayedMessageCount uint64, inboxTrackerDb ethdb.Database, transactionStreamerDb ethdb.Database) error { + prunedKeysRange, err := deleteFromLastPrunedUptoEndKey(ctx, transactionStreamerDb, messagePrefix, uint64(messageCount)) if err != nil { return fmt.Errorf("error deleting last batch messages: %w", err) } diff --git a/arbnode/message_pruner_test.go b/arbnode/message_pruner_test.go index 4c09aa8c6b..c0cb2cb4fe 100644 --- a/arbnode/message_pruner_test.go +++ b/arbnode/message_pruner_test.go @@ -9,24 +9,20 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" + "github.com/offchainlabs/nitro/arbutil" ) func TestMessagePrunerWithPruningEligibleMessagePresent(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - endBatchCount := uint64(2 * 100 * 1024) - endBatchMetadata := BatchMetadata{ - MessageCount: 2 * 100 * 1024, - DelayedMessageCount: 2 * 100 * 1024, - } - inboxTrackerDb, transactionStreamerDb := setupDatabase(t, endBatchCount, endBatchMetadata) - err := deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + messagesCount := uint64(2 * 100 * 1024) + inboxTrackerDb, transactionStreamerDb := setupDatabase(t, 2*100*1024, 2*100*1024) + err := deleteOldMessageFromDB(ctx, arbutil.MessageIndex(messagesCount), messagesCount, inboxTrackerDb, transactionStreamerDb) Require(t, err) - checkDbKeys(t, endBatchCount, inboxTrackerDb, sequencerBatchMetaPrefix) - checkDbKeys(t, uint64(endBatchMetadata.MessageCount), transactionStreamerDb, messagePrefix) - checkDbKeys(t, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, rlpDelayedMessagePrefix) + checkDbKeys(t, messagesCount, transactionStreamerDb, messagePrefix) + checkDbKeys(t, messagesCount, inboxTrackerDb, rlpDelayedMessagePrefix) } @@ -34,39 +30,35 @@ func TestMessagePrunerTraverseEachMessageOnlyOnce(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - endBatchCount := uint64(10) - endBatchMetadata := BatchMetadata{} - inboxTrackerDb, transactionStreamerDb := setupDatabase(t, endBatchCount, endBatchMetadata) - // In first iteration message till endBatchCount are tried to be deleted. - err := deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + messagesCount := uint64(10) + inboxTrackerDb, transactionStreamerDb := setupDatabase(t, messagesCount, messagesCount) + // In first iteration message till messagesCount are tried to be deleted. + err := deleteOldMessageFromDB(ctx, arbutil.MessageIndex(messagesCount), messagesCount, inboxTrackerDb, transactionStreamerDb) Require(t, err) - // In first iteration all the message till endBatchCount are deleted. - checkDbKeys(t, endBatchCount, inboxTrackerDb, sequencerBatchMetaPrefix) - // After first iteration endBatchCount/2 is reinserted in inbox db - err = inboxTrackerDb.Put(dbKey(sequencerBatchMetaPrefix, endBatchCount/2), []byte{}) + // After first iteration messagesCount/2 is reinserted in inbox db + err = inboxTrackerDb.Put(dbKey(messagePrefix, messagesCount/2), []byte{}) Require(t, err) - // In second iteration message till endBatchCount are again tried to be deleted. - err = deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + // In second iteration message till messagesCount are again tried to be deleted. + err = deleteOldMessageFromDB(ctx, arbutil.MessageIndex(messagesCount), messagesCount, inboxTrackerDb, transactionStreamerDb) Require(t, err) - // In second iteration all the message till endBatchCount are deleted again. - checkDbKeys(t, endBatchCount, inboxTrackerDb, sequencerBatchMetaPrefix) + // In second iteration all the message till messagesCount are deleted again. + checkDbKeys(t, messagesCount, transactionStreamerDb, messagePrefix) } func TestMessagePrunerPruneTillLessThenEqualTo(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - endBatchCount := uint64(10) - endBatchMetadata := BatchMetadata{} - inboxTrackerDb, transactionStreamerDb := setupDatabase(t, 2*endBatchCount, endBatchMetadata) - err := inboxTrackerDb.Delete(dbKey(sequencerBatchMetaPrefix, 9)) + messagesCount := uint64(10) + inboxTrackerDb, transactionStreamerDb := setupDatabase(t, 2*messagesCount, 20) + err := inboxTrackerDb.Delete(dbKey(messagePrefix, 9)) Require(t, err) - err = deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + err = deleteOldMessageFromDB(ctx, arbutil.MessageIndex(messagesCount), messagesCount, inboxTrackerDb, transactionStreamerDb) Require(t, err) - hasKey, err := inboxTrackerDb.Has(dbKey(sequencerBatchMetaPrefix, 10)) + hasKey, err := transactionStreamerDb.Has(dbKey(messagePrefix, messagesCount)) Require(t, err) if !hasKey { - Fail(t, "Key", 10, "with prefix", string(sequencerBatchMetaPrefix), "should be present after pruning") + Fail(t, "Key", 10, "with prefix", string(messagePrefix), "should be present after pruning") } } @@ -74,35 +66,26 @@ func TestMessagePrunerWithNoPruningEligibleMessagePresent(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - endBatchCount := uint64(2) - endBatchMetadata := BatchMetadata{ - MessageCount: 2, - DelayedMessageCount: 2, - } - inboxTrackerDb, transactionStreamerDb := setupDatabase(t, endBatchCount, endBatchMetadata) - err := deleteOldMessageFromDB(ctx, endBatchCount, endBatchMetadata.MessageCount, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, transactionStreamerDb) + messagesCount := uint64(10) + inboxTrackerDb, transactionStreamerDb := setupDatabase(t, messagesCount, messagesCount) + err := deleteOldMessageFromDB(ctx, arbutil.MessageIndex(messagesCount), messagesCount, inboxTrackerDb, transactionStreamerDb) Require(t, err) - checkDbKeys(t, endBatchCount, inboxTrackerDb, sequencerBatchMetaPrefix) - checkDbKeys(t, uint64(endBatchMetadata.MessageCount), transactionStreamerDb, messagePrefix) - checkDbKeys(t, endBatchMetadata.DelayedMessageCount, inboxTrackerDb, rlpDelayedMessagePrefix) + checkDbKeys(t, uint64(messagesCount), transactionStreamerDb, messagePrefix) + checkDbKeys(t, messagesCount, inboxTrackerDb, rlpDelayedMessagePrefix) } -func setupDatabase(t *testing.T, endBatchCount uint64, endBatchMetadata BatchMetadata) (ethdb.Database, ethdb.Database) { - inboxTrackerDb := rawdb.NewMemoryDatabase() - for i := uint64(0); i < endBatchCount; i++ { - err := inboxTrackerDb.Put(dbKey(sequencerBatchMetaPrefix, i), []byte{}) - Require(t, err) - } +func setupDatabase(t *testing.T, messageCount, delayedMessageCount uint64) (ethdb.Database, ethdb.Database) { transactionStreamerDb := rawdb.NewMemoryDatabase() - for i := uint64(0); i < uint64(endBatchMetadata.MessageCount); i++ { + for i := uint64(0); i < uint64(messageCount); i++ { err := transactionStreamerDb.Put(dbKey(messagePrefix, i), []byte{}) Require(t, err) } - for i := uint64(0); i < endBatchMetadata.DelayedMessageCount; i++ { + inboxTrackerDb := rawdb.NewMemoryDatabase() + for i := uint64(0); i < delayedMessageCount; i++ { err := inboxTrackerDb.Put(dbKey(rlpDelayedMessagePrefix, i), []byte{}) Require(t, err) } @@ -111,6 +94,7 @@ func setupDatabase(t *testing.T, endBatchCount uint64, endBatchMetadata BatchMet } func checkDbKeys(t *testing.T, endCount uint64, db ethdb.Database, prefix []byte) { + t.Helper() for i := uint64(0); i < endCount; i++ { hasKey, err := db.Has(dbKey(prefix, i)) Require(t, err) From 3b9153852f4108b5076b739466ba3eeeba27b723 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Thu, 6 Jul 2023 19:51:35 -0600 Subject: [PATCH 59/63] validator: dont warn when catching up --- staker/block_validator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index f2671ebe44..a292dbd6f3 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -1059,7 +1059,7 @@ func (v *BlockValidator) checkValidatedGSCaughtUp() (bool, error) { log.Error("failed reading processedMsgCount", "err", err) processedMsgCount = 0 } - log.Warn("validator catching up to last valid", "lastValid.Batch", v.lastValidGS.Batch, "lastValid.PosInBatch", v.lastValidGS.PosInBatch, "batchCount", batchCount, "batchMsgCount", batchMsgCount, "processedMsgCount", processedMsgCount) + log.Info("validator catching up to last valid", "lastValid.Batch", v.lastValidGS.Batch, "lastValid.PosInBatch", v.lastValidGS.PosInBatch, "batchCount", batchCount, "batchMsgCount", batchMsgCount, "processedMsgCount", processedMsgCount) return false, nil } msg, err := v.streamer.GetMessage(count - 1) From 9be039780636c5b4a5978e5ae597241dd3e895fe Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Fri, 7 Jul 2023 09:18:17 -0600 Subject: [PATCH 60/63] block_validator bugfix: delete validation entry when done --- staker/block_validator.go | 1 + 1 file changed, 1 insertion(+) diff --git a/staker/block_validator.go b/staker/block_validator.go index a292dbd6f3..a08564f771 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -685,6 +685,7 @@ validationsLoop: log.Error("failed writing new validated to database", "pos", pos, "err", err) } atomicStorePos(&v.validatedA, pos+1) + v.validations.Delete(pos) nonBlockingTrigger(v.createNodesChan) nonBlockingTrigger(v.sendRecordChan) validatorMsgCountValidatedGauge.Update(int64(pos + 1)) From 266948dba032b0c8aa6fc53a72ec5079b6bcf846 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 10 Jul 2023 09:15:30 -0600 Subject: [PATCH 61/63] block_validator: don't try to read non-existing batch --- staker/block_validator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index a08564f771..9dad4fc3d7 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -429,7 +429,7 @@ func (v *BlockValidator) readBatch(ctx context.Context, batchNum uint64) (bool, if err != nil { return false, nil, 0, err } - if batchCount < batchNum { + if batchCount <= batchNum { return false, nil, 0, nil } batchMsgCount, err := v.inboxTracker.GetBatchMessageCount(batchNum) From 67af3b3be549d4276558bc77360f847fd9e2193a Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 10 Jul 2023 09:56:23 -0600 Subject: [PATCH 62/63] block_validator: fixin PR review comments --- staker/block_validator.go | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index 9dad4fc3d7..75f9fa7815 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -57,8 +57,8 @@ type BlockValidator struct { // only from logger thread lastValidInfoPrinted *GlobalStateValidatedInfo - // can be read by anyone holding reorg-read - // written by appropriate thread or reorg-write + // can be read (atomic.Load) by anyone holding reorg-read + // written (atomic.Set) by appropriate thread or (any way) holding reorg-write createdA uint64 recordSentA uint64 validatedA uint64 @@ -678,12 +678,11 @@ validationsLoop: } validatorValidValidationsCounter.Inc(1) } - v.lastValidGS = validationStatus.Entry.End - go v.recorder.MarkValid(pos, v.lastValidGS.BlockHash) - err := v.writeLastValidatedToDb(validationStatus.Entry.End, wasmRoots) + err := v.writeLastValidated(validationStatus.Entry.End, wasmRoots) if err != nil { log.Error("failed writing new validated to database", "pos", pos, "err", err) } + go v.recorder.MarkValid(pos, v.lastValidGS.BlockHash) atomicStorePos(&v.validatedA, pos+1) v.validations.Delete(pos) nonBlockingTrigger(v.createNodesChan) @@ -760,7 +759,8 @@ func (v *BlockValidator) iterativeValidationProgress(ctx context.Context, ignore var ErrValidationCanceled = errors.New("validation of block cancelled") -func (v *BlockValidator) writeLastValidatedToDb(gs validator.GoGlobalState, wasmRoots []common.Hash) error { +func (v *BlockValidator) writeLastValidated(gs validator.GoGlobalState, wasmRoots []common.Hash) error { + v.lastValidGS = gs info := GlobalStateValidatedInfo{ GlobalState: gs, WasmRoots: wasmRoots, @@ -798,7 +798,7 @@ func (v *BlockValidator) validGSIsNew(globalState validator.GoGlobalState) bool // this accepts globalstate even if not caught up func (v *BlockValidator) InitAssumeValid(globalState validator.GoGlobalState) error { if v.Started() { - return fmt.Errorf("cannot handle AssumeValid while running") + return fmt.Errorf("cannot handle InitAssumeValid while running") } // don't do anything if we already validated past that @@ -807,9 +807,8 @@ func (v *BlockValidator) InitAssumeValid(globalState validator.GoGlobalState) er } v.legacyValidInfo = nil - v.lastValidGS = globalState - err := v.writeLastValidatedToDb(v.lastValidGS, []common.Hash{}) + err := v.writeLastValidated(v.lastValidGS, nil) if err != nil { log.Error("failed writing new validated to database", "pos", v.lastValidGS, "err", err) } @@ -835,7 +834,7 @@ func (v *BlockValidator) UpdateLatestStaked(count arbutil.MessageIndex, globalSt return } v.legacyValidInfo = nil - v.lastValidGS = globalState + v.writeLastValidated(globalState, nil) return } @@ -866,8 +865,7 @@ func (v *BlockValidator) UpdateLatestStaked(count arbutil.MessageIndex, globalSt v.validatedA = countUint64 v.valLoopPos = count validatorMsgCountValidatedGauge.Update(int64(countUint64)) - v.lastValidGS = globalState - err = v.writeLastValidatedToDb(v.lastValidGS, []common.Hash{}) // we don't know which wasm roots were validated + err = v.writeLastValidated(v.lastValidGS, nil) // we don't know which wasm roots were validated if err != nil { log.Error("failed writing valid state after reorg", "err", err) } @@ -931,8 +929,7 @@ func (v *BlockValidator) Reorg(ctx context.Context, count arbutil.MessageIndex) if v.validatedA > countUint64 { v.validatedA = countUint64 validatorMsgCountValidatedGauge.Update(int64(countUint64)) - v.lastValidGS = v.nextCreateStartGS - err := v.writeLastValidatedToDb(v.lastValidGS, []common.Hash{}) // we don't know which wasm roots were validated + err := v.writeLastValidated(v.lastValidGS, nil) // we don't know which wasm roots were validated if err != nil { log.Error("failed writing valid state after reorg", "err", err) } @@ -1005,13 +1002,13 @@ func (v *BlockValidator) checkLegacyValid() error { log.Error("legacy validated blockHash does not fit chain", "info.BlockHash", v.legacyValidInfo.BlockHash, "chain", result.BlockHash, "count", msgCount) return fmt.Errorf("legacy validated blockHash does not fit chain") } - v.lastValidGS = validator.GoGlobalState{ + validGS := validator.GoGlobalState{ BlockHash: result.BlockHash, SendRoot: result.SendRoot, Batch: v.legacyValidInfo.AfterPosition.BatchNumber, PosInBatch: v.legacyValidInfo.AfterPosition.PosInBatch, } - err = v.writeLastValidatedToDb(v.lastValidGS, []common.Hash{}) + err = v.writeLastValidated(validGS, nil) if err == nil { err = v.db.Delete(legacyLastBlockValidatedInfoKey) if err != nil { From 902f2ee0a134c72251427ff41c73a877951ee392 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Mon, 10 Jul 2023 10:25:24 -0600 Subject: [PATCH 63/63] block_validator: add missing error check --- staker/block_validator.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index 75f9fa7815..9096324cff 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -834,7 +834,10 @@ func (v *BlockValidator) UpdateLatestStaked(count arbutil.MessageIndex, globalSt return } v.legacyValidInfo = nil - v.writeLastValidated(globalState, nil) + err := v.writeLastValidated(globalState, nil) + if err != nil { + log.Error("error writing last validated", "err", err) + } return }