Skip to content

Commit

Permalink
move opcodes to bytecode
Browse files Browse the repository at this point in the history
  • Loading branch information
rakita committed Sep 23, 2024
1 parent 2c1ab9b commit d0b8ad7
Show file tree
Hide file tree
Showing 34 changed files with 1,322 additions and 1,042 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions crates/bytecode/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ all = "warn"
[dependencies]
# revm
primitives.workspace = true
specification.workspace = true

# Jumpmap
bitvec = { version = "1", default-features = false, features = ["alloc"] }
Expand All @@ -34,9 +35,16 @@ serde = { version = "1.0", default-features = false, features = [
"rc",
], optional = true }

# parse opcode feature
paste = { version = "1.0", optional = true }
phf = { version = "0.11", default-features = false, optional = true, features = [
"macros",
] }

[features]
default = ["std"]
std = ["serde?/std", "primitives/std"]
hashbrown = ["primitives/hashbrown"]
serde = ["dep:serde", "primitives/serde", "bitvec/serde"]
serde-json = ["serde"]
parse = []
15 changes: 15 additions & 0 deletions crates/bytecode/src/eof.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
mod body;
mod decode_helpers;
mod header;
pub mod printer;
mod types_section;
pub mod verification;

pub use body::EofBody;
pub use header::EofHeader;
pub use types_section::TypesSection;
pub use verification::*;

use core::cmp::min;
use primitives::{b256, bytes, Bytes, B256};
Expand Down Expand Up @@ -48,6 +51,18 @@ impl Default for Eof {
}

impl Eof {
pub fn validate(&self) -> Result<(), EofError> {
validate_eof(self)
}

pub fn valitate_raw(bytes: Bytes) -> Result<Eof, EofError> {
validate_raw_eof(bytes)
}

pub fn validate_mode(&self, mode: CodeType) -> Result<(), EofError> {
validate_eof_inner(self, Some(mode))
}

/// Creates a new EOF container from the given body.
pub fn new(body: EofBody) -> Self {
body.into_eof()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
#[cfg(feature = "std")]
pub fn print_eof_code(code: &[u8]) {
use super::*;
use crate::instructions::utility::read_i16;
pub fn print(code: &[u8]) {
use crate::{opcode::*, utils::read_i16};
use primitives::hex;

// We can check validity and jump destinations in one pass.
let mut i = 0;
while i < code.len() {
let op = code[i];
let opcode = &OPCODE_INFO_JUMPTABLE[op as usize];
let opcode = &OPCODE_INFO[op as usize];

let Some(opcode) = opcode else {
println!("Unknown opcode: 0x{:02X}", op);
Expand Down Expand Up @@ -64,6 +63,6 @@ mod test {

#[test]
fn sanity_test() {
print_eof_code(&hex!("6001e200ffff00"));
print(&hex!("6001e200ffff00"));
}
}
Original file line number Diff line number Diff line change
@@ -1,68 +1,13 @@
use crate::{
instructions::utility::{read_i16, read_u16},
opcode, OPCODE_INFO_JUMPTABLE, STACK_LIMIT,
};
use bytecode::{
bitvec::prelude::{bitvec, BitVec, Lsb0},
eof::{Eof, EofDecodeError, TypesSection},
legacy::{JumpTable, LegacyAnalyzedBytecode},
Bytecode,
opcode::{self, OPCODE_INFO},
utils::{read_i16, read_u16},
};
use primitives::{Bytes, MAX_INITCODE_SIZE};
use specification::constantans::STACK_LIMIT;

use core::{convert::identity, mem};
use std::{borrow::Cow, fmt, sync::Arc, vec, vec::Vec};

/// Perform bytecode analysis.
///
/// The analysis finds and caches valid jump destinations for later execution as an optimization step.
///
/// If the bytecode is already analyzed, it is returned as-is.
#[inline]
pub fn to_analysed(bytecode: Bytecode) -> Bytecode {
let (bytes, len) = match bytecode {
Bytecode::LegacyRaw(bytecode) => {
let len = bytecode.len();
let mut padded_bytecode = Vec::with_capacity(len + 33);
padded_bytecode.extend_from_slice(&bytecode);
padded_bytecode.resize(len + 33, 0);
(Bytes::from(padded_bytecode), len)
}
n => return n,
};
let jump_table = analyze(bytes.as_ref());

Bytecode::LegacyAnalyzed(LegacyAnalyzedBytecode::new(bytes, len, jump_table))
}

/// Analyze bytecode to build a jump map.
fn analyze(code: &[u8]) -> JumpTable {
let mut jumps: BitVec<u8> = bitvec![u8, Lsb0; 0; code.len()];

let range = code.as_ptr_range();
let start = range.start;
let mut iterator = start;
let end = range.end;
while iterator < end {
let opcode = unsafe { *iterator };
if opcode::JUMPDEST == opcode {
// SAFETY: jumps are max length of the code
unsafe { jumps.set_unchecked(iterator.offset_from(start) as usize, true) }
iterator = unsafe { iterator.offset(1) };
} else {
let push_offset = opcode.wrapping_sub(opcode::PUSH1);
if push_offset < 32 {
// SAFETY: iterator access range is checked in the while loop
iterator = unsafe { iterator.offset((push_offset + 2) as isize) };
} else {
// SAFETY: iterator access range is checked in the while loop
iterator = unsafe { iterator.offset(1) };
}
}
}

JumpTable(Arc::new(jumps))
}
use std::{borrow::Cow, fmt, vec, vec::Vec};

/// Decodes `raw` into an [`Eof`] container and validates it.
pub fn validate_raw_eof(raw: Bytes) -> Result<Eof, EofError> {
Expand Down Expand Up @@ -507,7 +452,7 @@ pub fn validate_eof_code(
// We can check validity and jump destinations in one pass.
while i < code.len() {
let op = code[i];
let opcode = &OPCODE_INFO_JUMPTABLE[op as usize];
let opcode = &OPCODE_INFO[op as usize];

let Some(opcode) = opcode else {
// err unknown opcode.
Expand Down
69 changes: 4 additions & 65 deletions crates/bytecode/src/legacy.rs
Original file line number Diff line number Diff line change
@@ -1,68 +1,7 @@
mod analyzed;
mod jump_map;
mod raw;

pub use analyzed::LegacyAnalyzedBytecode;
pub use jump_map::JumpTable;

use bitvec::{bitvec, order::Lsb0};
use primitives::Bytes;
use std::sync::Arc;

/// Legacy analyzed
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct LegacyAnalyzedBytecode {
/// Bytecode with 32 zero bytes padding.
bytecode: Bytes,
/// Original bytes length.
original_len: usize,
/// Jump table.
jump_table: JumpTable,
}

impl Default for LegacyAnalyzedBytecode {
#[inline]
fn default() -> Self {
Self {
bytecode: Bytes::from_static(&[0]),
original_len: 0,
jump_table: JumpTable(Arc::new(bitvec![u8, Lsb0; 0])),
}
}
}

impl LegacyAnalyzedBytecode {
/// Create new analyzed bytecode.
pub fn new(bytecode: Bytes, original_len: usize, jump_table: JumpTable) -> Self {
Self {
bytecode,
original_len,
jump_table,
}
}

/// Returns a reference to the bytecode.
///
/// The bytecode is padded with 32 zero bytes.
pub fn bytecode(&self) -> &Bytes {
&self.bytecode
}

/// Original bytes length.
pub fn original_len(&self) -> usize {
self.original_len
}

/// Original bytes without padding.
pub fn original_bytes(&self) -> Bytes {
self.bytecode.slice(..self.original_len)
}

/// Original bytes without padding.
pub fn original_byte_slice(&self) -> &[u8] {
&self.bytecode[..self.original_len]
}

/// Jumptable of analyzed bytes.
pub fn jump_table(&self) -> &JumpTable {
&self.jump_table
}
}
pub use raw::LegacyRawBytecode;
65 changes: 65 additions & 0 deletions crates/bytecode/src/legacy/analyzed.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
use super::JumpTable;
use bitvec::{bitvec, order::Lsb0};
use primitives::Bytes;
use std::sync::Arc;

// Legacy analyzed
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct LegacyAnalyzedBytecode {
/// Bytecode with 32 zero bytes padding.
bytecode: Bytes,
/// Original bytes length.
original_len: usize,
/// Jump table.
jump_table: JumpTable,
}

impl Default for LegacyAnalyzedBytecode {
#[inline]
fn default() -> Self {
Self {
bytecode: Bytes::from_static(&[0]),
original_len: 0,
jump_table: JumpTable(Arc::new(bitvec![u8, Lsb0; 0])),
}
}
}

impl LegacyAnalyzedBytecode {
/// Create new analyzed bytecode.
pub fn new(bytecode: Bytes, original_len: usize, jump_table: JumpTable) -> Self {
Self {
bytecode,
original_len,
jump_table,
}
}

/// Returns a reference to the bytecode.
///
/// The bytecode is padded with 32 zero bytes.
pub fn bytecode(&self) -> &Bytes {
&self.bytecode
}

/// Original bytes length.
pub fn original_len(&self) -> usize {
self.original_len
}

/// Original bytes without padding.
pub fn original_bytes(&self) -> Bytes {
self.bytecode.slice(..self.original_len)
}

/// Original bytes without padding.
pub fn original_byte_slice(&self) -> &[u8] {
&self.bytecode[..self.original_len]
}

/// Jumptable of analyzed bytes.
pub fn jump_table(&self) -> &JumpTable {
&self.jump_table
}
}
69 changes: 69 additions & 0 deletions crates/bytecode/src/legacy/raw.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
use super::{JumpTable, LegacyAnalyzedBytecode};
use crate::opcode;
use bitvec::{bitvec, order::Lsb0, vec::BitVec};
use core::ops::Deref;
use primitives::Bytes;
use std::{sync::Arc, vec::Vec};

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct LegacyRawBytecode(pub Bytes);

impl LegacyRawBytecode {
pub fn analysis(&self) -> JumpTable {
let mut jumps: BitVec<u8> = bitvec![u8, Lsb0; 0; self.0.len()];

let range = self.0.as_ptr_range();
let start = range.start;
let mut iterator = start;
let end = range.end;
while iterator < end {
let opcode = unsafe { *iterator };
if opcode::JUMPDEST == opcode {
// SAFETY: jumps are max length of the code
unsafe { jumps.set_unchecked(iterator.offset_from(start) as usize, true) }
iterator = unsafe { iterator.offset(1) };
} else {
let push_offset = opcode.wrapping_sub(opcode::PUSH1);
if push_offset < 32 {
// SAFETY: iterator access range is checked in the while loop
iterator = unsafe { iterator.offset((push_offset + 2) as isize) };
} else {
// SAFETY: iterator access range is checked in the while loop
iterator = unsafe { iterator.offset(1) };
}
}
}

JumpTable(Arc::new(jumps))
}

pub fn into_analyzed(self) -> LegacyAnalyzedBytecode {
let jump_table = self.analysis();
let len = self.0.len();
let mut padded_bytecode = Vec::with_capacity(len + 33);
padded_bytecode.extend_from_slice(&self.0);
padded_bytecode.resize(len + 33, 0);
LegacyAnalyzedBytecode::new(padded_bytecode.into(), len, jump_table)
}
}

impl From<Bytes> for LegacyRawBytecode {
fn from(bytes: Bytes) -> Self {
Self(bytes)
}
}

impl<const N: usize> From<[u8; N]> for LegacyRawBytecode {
fn from(bytes: [u8; N]) -> Self {
Self(bytes.into())
}
}

impl Deref for LegacyRawBytecode {
type Target = Bytes;

fn deref(&self) -> &Self::Target {
&self.0
}
}
Loading

0 comments on commit d0b8ad7

Please sign in to comment.