diff --git a/bril-rs/Cargo.toml b/bril-rs/Cargo.toml index e9a0d5948..950244a4a 100644 --- a/bril-rs/Cargo.toml +++ b/bril-rs/Cargo.toml @@ -17,7 +17,6 @@ keywords = ["compiler", "bril", "parser", "data-structures", "language"] thiserror = "1.0" serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } -encode_unicode = { version = "1.0.0", optional = true} [features] float = [] @@ -26,7 +25,7 @@ ssa = [] speculate = [] position = [] import = [] -char = ["dep:encode_unicode"] +char = [] [[example]] name = "bril2txt" diff --git a/bril-rs/bril2json/Cargo.toml b/bril-rs/bril2json/Cargo.toml index 9387bd5d7..b9212d8a8 100644 --- a/bril-rs/bril2json/Cargo.toml +++ b/bril-rs/bril2json/Cargo.toml @@ -15,7 +15,6 @@ keywords = ["compiler", "bril", "parser", "data-structures", "language"] [dependencies] clap = { version = "4.3", features = ["derive"] } -encode_unicode = "1.0.0" lalrpop-util = { version = "0.20", features = ["lexer"] } regex = "1.8" diff --git a/bril-rs/bril2json/src/bril_grammar.lalrpop b/bril-rs/bril2json/src/bril_grammar.lalrpop index 8c41e2514..742ce25df 100644 --- a/bril-rs/bril2json/src/bril_grammar.lalrpop +++ b/bril-rs/bril2json/src/bril_grammar.lalrpop @@ -201,7 +201,7 @@ Bool: bool = { Float: f64 = => f64::from_str(f).unwrap(); -Char: u16 = => {let c = c.trim_matches('\''); escape_control_chars(c).unwrap_or_else(|| encode_unicode::Utf16Char::from_str_start(c).unwrap().0.to_tuple().0)}; +Char: char = => {let c = c.trim_matches('\''); escape_control_chars(c).unwrap()}; // https://lalrpop.github.io/lalrpop/tutorial/006_macros.html Comma: Vec = { // (1) diff --git a/bril-rs/bril2json/src/lib.rs b/bril-rs/bril2json/src/lib.rs index 29439927d..d921841d6 100644 --- a/bril-rs/bril2json/src/lib.rs +++ b/bril-rs/bril2json/src/lib.rs @@ -12,16 +12,19 @@ use std::fs::File; use bril_rs::{AbstractProgram, ColRow, Position}; -fn escape_control_chars(s: &str) -> Option { +/// A helper function for processing the accepted Bril characters from their text representation +#[must_use] +pub fn escape_control_chars(s: &str) -> Option { match s { - "\\0" => Some(0), - "\\a" => Some(7), - "\\b" => Some(8), - "\\t" => Some(9), - "\\n" => Some(10), - "\\v" => Some(11), - "\\f" => Some(12), - "\\r" => Some(13), + "\\0" => Some('\u{0000}'), + "\\a" => Some('\u{0007}'), + "\\b" => Some('\u{0008}'), + "\\t" => Some('\u{0009}'), + "\\n" => Some('\u{000A}'), + "\\v" => Some('\u{000B}'), + "\\f" => Some('\u{000C}'), + "\\r" => Some('\u{000D}'), + s if s.len() == 1 => s.chars().next(), _ => None, } } diff --git a/bril-rs/src/program.rs b/bril-rs/src/program.rs index e188f7ee1..ef9acb9ab 100644 --- a/bril-rs/src/program.rs +++ b/bril-rs/src/program.rs @@ -593,36 +593,7 @@ pub enum Literal { Float(f64), /// UTF-16 Characters #[cfg(feature = "char")] - #[serde(deserialize_with = "deserialize_bmp")] - #[serde(serialize_with = "serialize_bmp")] - Char(u16), -} - -#[cfg(feature = "char")] -fn deserialize_bmp<'de, D>(deserializer: D) -> Result -where - D: serde::Deserializer<'de>, -{ - let s = String::deserialize(deserializer)?; - - if s.len() != 1 { - return Err(serde::de::Error::custom("invalid UTF-16 character")); - } - - let c = encode_unicode::Utf16Char::from_str_start(&s) - .map_err(|_| serde::de::Error::custom("invalid UTF-16 character")) - .map(|c| c.0.to_tuple().0)?; - Ok(c) -} - -#[cfg(feature = "char")] -#[allow(clippy::trivially_copy_pass_by_ref)] // to match serde's signature -fn serialize_bmp(c: &u16, serializer: S) -> Result -where - S: serde::Serializer, -{ - let c = encode_unicode::Utf16Char::from_bmp(*c).unwrap(); - serializer.serialize_str(&c.to_string()) + Char(char), } impl Display for Literal { @@ -633,15 +604,26 @@ impl Display for Literal { #[cfg(feature = "float")] Self::Float(x) => write!(f, "{x}"), #[cfg(feature = "char")] - Self::Char(c) => write!( - f, - "\'{}\'", - encode_unicode::Utf16Char::from_bmp(*c).unwrap() - ), + Self::Char(c) => write!(f, "\'{}\'", escape_char(*c)), } } } +#[cfg(feature = "char")] +fn escape_char(c: char) -> String { + match c { + '\u{0000}' => "\\0".to_string(), + '\u{0007}' => "\\a".to_string(), + '\u{0008}' => "\\b".to_string(), + '\u{0009}' => "\\t".to_string(), + '\u{000A}' => "\\n".to_string(), + '\u{000B}' => "\\v".to_string(), + '\u{000C}' => "\\f".to_string(), + '\u{000D}' => "\\r".to_string(), + c => c.to_string(), + } +} + impl Literal { /// A helper function to get the type of literal values #[must_use] diff --git a/brilirs/Cargo.toml b/brilirs/Cargo.toml index 5665b1433..f16e3341a 100644 --- a/brilirs/Cargo.toml +++ b/brilirs/Cargo.toml @@ -13,16 +13,15 @@ keywords = ["compiler", "bril", "interpreter", "data-structures", "language"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [build-dependencies] -clap = { version = "4.2", features = ["derive"] } -clap_complete= { version = "4.2", optional = true } +clap = { version = "4.3", features = ["derive"] } +clap_complete= { version = "4.3", optional = true } [dependencies] thiserror = "1.0" -clap = { version = "4.2", features = ["derive"] } +clap = { version = "4.3", features = ["derive"] } fxhash = "0.2" mimalloc = "0.1" itoa = "1.0" -encode_unicode = "1.0.0" [dependencies.bril-rs] version = "0.1.0" diff --git a/brilirs/src/interp.rs b/brilirs/src/interp.rs index 8429aa01a..17c5a5596 100644 --- a/brilirs/src/interp.rs +++ b/brilirs/src/interp.rs @@ -1,8 +1,8 @@ use crate::basic_block::{BBFunction, BBProgram, BasicBlock}; use crate::error::{InterpError, PositionalInterpError}; +use bril2json::escape_control_chars; use bril_rs::Instruction; -use encode_unicode::Utf16Char; use fxhash::FxHashMap; use mimalloc::MiMalloc; @@ -170,7 +170,7 @@ enum Value { Int(i64), Bool(bool), Float(f64), - Char(u16), + Char(char), Pointer(Pointer), #[default] Uninitialized, @@ -199,7 +199,7 @@ impl fmt::Display for Value { Self::Float(v) if v.is_infinite() && v.is_sign_positive() => write!(f, "Infinity"), Self::Float(v) if v.is_infinite() && v.is_sign_negative() => write!(f, "-Infinity"), Self::Float(v) => write!(f, "{v:.17}"), - Self::Char(c) => write!(f, "{}", Utf16Char::from_bmp(*c).unwrap()), + Self::Char(c) => write!(f, "{c}"), Self::Pointer(p) => write!(f, "{p:?}"), Self::Uninitialized => unreachable!(), } @@ -209,12 +209,15 @@ impl fmt::Display for Value { fn optimized_val_output(out: &mut T, val: &Value) -> Result<(), std::io::Error> { match val { Value::Int(i) => out.write_all(itoa::Buffer::new().format(*i).as_bytes()), - Value::Bool(b) => out.write_all(b.to_string().as_bytes()), + Value::Bool(b) => out.write_all(if *b { b"true" } else { b"false" }), Value::Float(f) if f.is_infinite() && f.is_sign_positive() => out.write_all(b"Infinity"), Value::Float(f) if f.is_infinite() && f.is_sign_negative() => out.write_all(b"-Infinity"), Value::Float(f) if f.is_nan() => out.write_all(b"NaN"), Value::Float(f) => out.write_all(format!("{f:.17}").as_bytes()), - Value::Char(c) => out.write_all(Utf16Char::from_bmp(*c).unwrap().to_string().as_bytes()), + Value::Char(c) => { + let buf = &mut [0_u8; 2]; + out.write_all(c.encode_utf8(buf).as_bytes()) + } Value::Pointer(p) => out.write_all(format!("{p:?}").as_bytes()), Value::Uninitialized => unreachable!(), } @@ -272,7 +275,7 @@ impl From<&Value> for f64 { } } -impl From<&Value> for u16 { +impl From<&Value> for char { fn from(value: &Value) -> Self { if let Value::Char(c) = value { *c @@ -437,42 +440,43 @@ fn execute_value_op( state.env.set(dest, Value::Bool(arg0 >= arg1)); } Ceq => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 == arg1)); } Clt => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 < arg1)); } Cgt => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 > arg1)); } Cle => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 <= arg1)); } Cge => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 >= arg1)); } Char2int => { - let arg0 = get_arg::(&state.env, 0, args); - state.env.set(dest, Value::Int(i64::from(arg0))); + let arg0 = get_arg::(&state.env, 0, args); + state.env.set(dest, Value::Int(u32::from(arg0).into())); } Int2char => { let arg0 = get_arg::(&state.env, 0, args); - let arg0_u16 = u16::try_from(arg0).map_err(|_| InterpError::ToCharError(arg0))?; - - let _arg0_char = Utf16Char::from_bmp(arg0_u16).map_err(|_| InterpError::ToCharError(arg0))?; + let arg0_char = u32::try_from(arg0) + .ok() + .and_then(char::from_u32) + .ok_or(InterpError::ToCharError(arg0))?; - state.env.set(dest, Value::Char(arg0_u16)); + state.env.set(dest, Value::Char(arg0_char)); } Call => { let callee_func = state.prog.get(funcs[0]).unwrap(); @@ -688,20 +692,6 @@ fn execute<'a, T: std::io::Write>( } } -fn escape_control_chars(s: &str) -> Result { - match s { - "\\0" => Ok(0), - "\\a" => Ok(7), - "\\b" => Ok(8), - "\\t" => Ok(9), - "\\n" => Ok(10), - "\\v" => Ok(11), - "\\f" => Ok(12), - "\\r" => Ok(13), - _ => Utf16Char::from_str_start(s).map(|c| *c.0.to_array().first().unwrap()), - } -} - fn parse_args( mut env: Environment, args: &[bril_rs::Argument], @@ -757,7 +747,7 @@ fn parse_args( bril_rs::Type::Pointer(..) => unreachable!(), bril_rs::Type::Char => escape_control_chars(inputs.get(index).unwrap().as_ref()) .map_or_else( - |_| Err(InterpError::NotOneChar), + || Err(InterpError::NotOneChar), |c| { env.set(*arg_as_num, Value::Char(c)); Ok(())