Skip to content

Commit

Permalink
Move to char internal representation
Browse files Browse the repository at this point in the history
  • Loading branch information
Pat-Lafon committed Jun 29, 2023
1 parent e0895fb commit 7407d9d
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 89 deletions.
3 changes: 1 addition & 2 deletions bril-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ keywords = ["compiler", "bril", "parser", "data-structures", "language"]
thiserror = "1.0"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
encode_unicode = { version = "1.0.0", optional = true}

[features]
float = []
Expand All @@ -26,7 +25,7 @@ ssa = []
speculate = []
position = []
import = []
char = ["dep:encode_unicode"]
char = []

[[example]]
name = "bril2txt"
Expand Down
1 change: 0 additions & 1 deletion bril-rs/bril2json/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ keywords = ["compiler", "bril", "parser", "data-structures", "language"]

[dependencies]
clap = { version = "4.3", features = ["derive"] }
encode_unicode = "1.0.0"
lalrpop-util = { version = "0.20", features = ["lexer"] }
regex = "1.8"

Expand Down
2 changes: 1 addition & 1 deletion bril-rs/bril2json/src/bril_grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ Bool: bool = {

Float: f64 = <f:FLOAT_TOKEN> => f64::from_str(f).unwrap();

Char: u16 = <c:CHAR_TOKEN> => {let c = c.trim_matches('\''); escape_control_chars(c).unwrap_or_else(|| encode_unicode::Utf16Char::from_str_start(c).unwrap().0.to_tuple().0)};
Char: char = <c:CHAR_TOKEN> => {let c = c.trim_matches('\''); escape_control_chars(c).unwrap()};

// https://lalrpop.github.io/lalrpop/tutorial/006_macros.html
Comma<T>: Vec<T> = { // (1)
Expand Down
21 changes: 12 additions & 9 deletions bril-rs/bril2json/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@ use std::fs::File;

use bril_rs::{AbstractProgram, ColRow, Position};

fn escape_control_chars(s: &str) -> Option<u16> {
/// A helper function for processing the accepted Bril characters from their text representation
#[must_use]
pub fn escape_control_chars(s: &str) -> Option<char> {
match s {
"\\0" => Some(0),
"\\a" => Some(7),
"\\b" => Some(8),
"\\t" => Some(9),
"\\n" => Some(10),
"\\v" => Some(11),
"\\f" => Some(12),
"\\r" => Some(13),
"\\0" => Some('\u{0000}'),
"\\a" => Some('\u{0007}'),
"\\b" => Some('\u{0008}'),
"\\t" => Some('\u{0009}'),
"\\n" => Some('\u{000A}'),
"\\v" => Some('\u{000B}'),
"\\f" => Some('\u{000C}'),
"\\r" => Some('\u{000D}'),
s if s.len() == 1 => s.chars().next(),
_ => None,
}
}
Expand Down
52 changes: 17 additions & 35 deletions bril-rs/src/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -593,36 +593,7 @@ pub enum Literal {
Float(f64),
/// UTF-16 Characters
#[cfg(feature = "char")]
#[serde(deserialize_with = "deserialize_bmp")]
#[serde(serialize_with = "serialize_bmp")]
Char(u16),
}

#[cfg(feature = "char")]
fn deserialize_bmp<'de, D>(deserializer: D) -> Result<u16, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;

if s.len() != 1 {
return Err(serde::de::Error::custom("invalid UTF-16 character"));
}

let c = encode_unicode::Utf16Char::from_str_start(&s)
.map_err(|_| serde::de::Error::custom("invalid UTF-16 character"))
.map(|c| c.0.to_tuple().0)?;
Ok(c)
}

#[cfg(feature = "char")]
#[allow(clippy::trivially_copy_pass_by_ref)] // to match serde's signature
fn serialize_bmp<S>(c: &u16, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let c = encode_unicode::Utf16Char::from_bmp(*c).unwrap();
serializer.serialize_str(&c.to_string())
Char(char),
}

impl Display for Literal {
Expand All @@ -633,15 +604,26 @@ impl Display for Literal {
#[cfg(feature = "float")]
Self::Float(x) => write!(f, "{x}"),
#[cfg(feature = "char")]
Self::Char(c) => write!(
f,
"\'{}\'",
encode_unicode::Utf16Char::from_bmp(*c).unwrap()
),
Self::Char(c) => write!(f, "\'{}\'", escape_char(*c)),
}
}
}

#[cfg(feature = "char")]
fn escape_char(c: char) -> String {
match c {
'\u{0000}' => "\\0".to_string(),
'\u{0007}' => "\\a".to_string(),
'\u{0008}' => "\\b".to_string(),
'\u{0009}' => "\\t".to_string(),
'\u{000A}' => "\\n".to_string(),
'\u{000B}' => "\\v".to_string(),
'\u{000C}' => "\\f".to_string(),
'\u{000D}' => "\\r".to_string(),
c => c.to_string(),
}
}

impl Literal {
/// A helper function to get the type of literal values
#[must_use]
Expand Down
7 changes: 3 additions & 4 deletions brilirs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@ keywords = ["compiler", "bril", "interpreter", "data-structures", "language"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[build-dependencies]
clap = { version = "4.2", features = ["derive"] }
clap_complete= { version = "4.2", optional = true }
clap = { version = "4.3", features = ["derive"] }
clap_complete= { version = "4.3", optional = true }

[dependencies]
thiserror = "1.0"
clap = { version = "4.2", features = ["derive"] }
clap = { version = "4.3", features = ["derive"] }
fxhash = "0.2"
mimalloc = "0.1"
itoa = "1.0"
encode_unicode = "1.0.0"

[dependencies.bril-rs]
version = "0.1.0"
Expand Down
64 changes: 27 additions & 37 deletions brilirs/src/interp.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::basic_block::{BBFunction, BBProgram, BasicBlock};
use crate::error::{InterpError, PositionalInterpError};
use bril2json::escape_control_chars;
use bril_rs::Instruction;

use encode_unicode::Utf16Char;
use fxhash::FxHashMap;

use mimalloc::MiMalloc;
Expand Down Expand Up @@ -170,7 +170,7 @@ enum Value {
Int(i64),
Bool(bool),
Float(f64),
Char(u16),
Char(char),
Pointer(Pointer),
#[default]
Uninitialized,
Expand Down Expand Up @@ -199,7 +199,7 @@ impl fmt::Display for Value {
Self::Float(v) if v.is_infinite() && v.is_sign_positive() => write!(f, "Infinity"),
Self::Float(v) if v.is_infinite() && v.is_sign_negative() => write!(f, "-Infinity"),
Self::Float(v) => write!(f, "{v:.17}"),
Self::Char(c) => write!(f, "{}", Utf16Char::from_bmp(*c).unwrap()),
Self::Char(c) => write!(f, "{c}"),
Self::Pointer(p) => write!(f, "{p:?}"),
Self::Uninitialized => unreachable!(),
}
Expand All @@ -209,12 +209,15 @@ impl fmt::Display for Value {
fn optimized_val_output<T: std::io::Write>(out: &mut T, val: &Value) -> Result<(), std::io::Error> {
match val {
Value::Int(i) => out.write_all(itoa::Buffer::new().format(*i).as_bytes()),
Value::Bool(b) => out.write_all(b.to_string().as_bytes()),
Value::Bool(b) => out.write_all(if *b { b"true" } else { b"false" }),
Value::Float(f) if f.is_infinite() && f.is_sign_positive() => out.write_all(b"Infinity"),
Value::Float(f) if f.is_infinite() && f.is_sign_negative() => out.write_all(b"-Infinity"),
Value::Float(f) if f.is_nan() => out.write_all(b"NaN"),
Value::Float(f) => out.write_all(format!("{f:.17}").as_bytes()),
Value::Char(c) => out.write_all(Utf16Char::from_bmp(*c).unwrap().to_string().as_bytes()),
Value::Char(c) => {
let buf = &mut [0_u8; 2];
out.write_all(c.encode_utf8(buf).as_bytes())
}
Value::Pointer(p) => out.write_all(format!("{p:?}").as_bytes()),
Value::Uninitialized => unreachable!(),
}
Expand Down Expand Up @@ -272,7 +275,7 @@ impl From<&Value> for f64 {
}
}

impl From<&Value> for u16 {
impl From<&Value> for char {
fn from(value: &Value) -> Self {
if let Value::Char(c) = value {
*c
Expand Down Expand Up @@ -437,42 +440,43 @@ fn execute_value_op<T: std::io::Write>(
state.env.set(dest, Value::Bool(arg0 >= arg1));
}
Ceq => {
let arg0 = get_arg::<u16>(&state.env, 0, args);
let arg1 = get_arg::<u16>(&state.env, 1, args);
let arg0 = get_arg::<char>(&state.env, 0, args);
let arg1 = get_arg::<char>(&state.env, 1, args);
state.env.set(dest, Value::Bool(arg0 == arg1));
}
Clt => {
let arg0 = get_arg::<u16>(&state.env, 0, args);
let arg1 = get_arg::<u16>(&state.env, 1, args);
let arg0 = get_arg::<char>(&state.env, 0, args);
let arg1 = get_arg::<char>(&state.env, 1, args);
state.env.set(dest, Value::Bool(arg0 < arg1));
}
Cgt => {
let arg0 = get_arg::<u16>(&state.env, 0, args);
let arg1 = get_arg::<u16>(&state.env, 1, args);
let arg0 = get_arg::<char>(&state.env, 0, args);
let arg1 = get_arg::<char>(&state.env, 1, args);
state.env.set(dest, Value::Bool(arg0 > arg1));
}
Cle => {
let arg0 = get_arg::<u16>(&state.env, 0, args);
let arg1 = get_arg::<u16>(&state.env, 1, args);
let arg0 = get_arg::<char>(&state.env, 0, args);
let arg1 = get_arg::<char>(&state.env, 1, args);
state.env.set(dest, Value::Bool(arg0 <= arg1));
}
Cge => {
let arg0 = get_arg::<u16>(&state.env, 0, args);
let arg1 = get_arg::<u16>(&state.env, 1, args);
let arg0 = get_arg::<char>(&state.env, 0, args);
let arg1 = get_arg::<char>(&state.env, 1, args);
state.env.set(dest, Value::Bool(arg0 >= arg1));
}
Char2int => {
let arg0 = get_arg::<u16>(&state.env, 0, args);
state.env.set(dest, Value::Int(i64::from(arg0)));
let arg0 = get_arg::<char>(&state.env, 0, args);
state.env.set(dest, Value::Int(u32::from(arg0).into()));
}
Int2char => {
let arg0 = get_arg::<i64>(&state.env, 0, args);

let arg0_u16 = u16::try_from(arg0).map_err(|_| InterpError::ToCharError(arg0))?;

let _arg0_char = Utf16Char::from_bmp(arg0_u16).map_err(|_| InterpError::ToCharError(arg0))?;
let arg0_char = u32::try_from(arg0)
.ok()
.and_then(char::from_u32)
.ok_or(InterpError::ToCharError(arg0))?;

state.env.set(dest, Value::Char(arg0_u16));
state.env.set(dest, Value::Char(arg0_char));
}
Call => {
let callee_func = state.prog.get(funcs[0]).unwrap();
Expand Down Expand Up @@ -688,20 +692,6 @@ fn execute<'a, T: std::io::Write>(
}
}

fn escape_control_chars(s: &str) -> Result<u16, encode_unicode::error::EmptyStrError> {
match s {
"\\0" => Ok(0),
"\\a" => Ok(7),
"\\b" => Ok(8),
"\\t" => Ok(9),
"\\n" => Ok(10),
"\\v" => Ok(11),
"\\f" => Ok(12),
"\\r" => Ok(13),
_ => Utf16Char::from_str_start(s).map(|c| *c.0.to_array().first().unwrap()),
}
}

fn parse_args(
mut env: Environment,
args: &[bril_rs::Argument],
Expand Down Expand Up @@ -757,7 +747,7 @@ fn parse_args(
bril_rs::Type::Pointer(..) => unreachable!(),
bril_rs::Type::Char => escape_control_chars(inputs.get(index).unwrap().as_ref())
.map_or_else(
|_| Err(InterpError::NotOneChar),
|| Err(InterpError::NotOneChar),
|c| {
env.set(*arg_as_num, Value::Char(c));
Ok(())
Expand Down

0 comments on commit 7407d9d

Please sign in to comment.