Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try out winnow #86

Closed
wants to merge 13 commits into from
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ rustc-hash = "1.1.0"
unicode-ident = "1"
once_cell = "1.17.0"
indexmap = "2"
winnow = "0.6.7"

[dev-dependencies]
wgpu = { version = "0.19.0", features = ["naga-ir"] }
Expand Down
235 changes: 159 additions & 76 deletions src/compose/comment_strip_iter.rs
Original file line number Diff line number Diff line change
@@ -1,90 +1,178 @@
use std::{borrow::Cow, str::Lines};
use std::{borrow::Cow, ops::Range};

use winnow::{
ascii::till_line_ending,
combinator::{cut_err, opt},
error::StrContext,
token::any,
Located, PResult, Parser,
};

struct SourceCode {
/** Sorted pieces of the source code without any gaps */
parts: Vec<SourceCodePart>,
}

enum SourceCodePart {
Text(Range<usize>),
SingleLineComment(SingleLineComment),
MultiLineComment(MultiLineComment),
}

impl SourceCodePart {
fn span(&self) -> Range<usize> {
match self {
SourceCodePart::Text(span) => span.clone(),
SourceCodePart::SingleLineComment(comment) => comment.span.clone(),
SourceCodePart::MultiLineComment(comment) => comment.span.clone(),
}
}
}

use regex::Regex;
pub struct SingleLineComment {
pub span: Range<usize>,
}
pub struct MultiLineComment {
pub span: Range<usize>,
}

static RE_COMMENT: once_cell::sync::Lazy<Regex> =
once_cell::sync::Lazy::new(|| Regex::new(r"(//|/\*|\*/)").unwrap());
fn parse_source(input: &mut Located<&str>) -> PResult<SourceCode> {
let mut parts = Vec::new();
loop {
if input.is_empty() {
break;
}
if let Some(part) = opt(single_line_comment).parse_next(input)? {
parts.push(SourceCodePart::SingleLineComment(part));
} else if let Some(part) = opt(multi_line_comment).parse_next(input)? {
parts.push(SourceCodePart::MultiLineComment(part));
} else {
let text_span = any.span().parse_next(input)?;
if let Some(SourceCodePart::Text(last_span)) = parts.last_mut() {
last_span.end = text_span.end;
} else {
parts.push(SourceCodePart::Text(text_span));
}
}
}
Ok(SourceCode { parts })
}

fn single_line_comment(input: &mut Located<&str>) -> PResult<SingleLineComment> {
let start_span = "//".span().parse_next(input)?;
let text_span = till_line_ending.span().parse_next(input)?;
Ok(SingleLineComment {
span: start_span.start..text_span.end,
})
}
fn multi_line_comment(input: &mut Located<&str>) -> PResult<MultiLineComment> {
let start_span = "/*".span().parse_next(input)?;
loop {
if let Some(end_span) = opt("*/".span()).parse_next(input)? {
return Ok(MultiLineComment {
span: start_span.start..end_span.end,
});
} else if let Some(_) = opt(multi_line_comment).parse_next(input)? {
// We found a nested comment, skip it
} else {
// Skip a single character
let _ = cut_err(any)
.context(StrContext::Label("multiline comment"))
.parse_next(input)?;
}
}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Up until here is the actual parser code. Everything below is a dance to keep the existing behaviour mostly intact, while also prepping to replace more code.

pub struct CommentReplaceIter<'a> {
lines: &'a mut Lines<'a>,
block_depth: usize,
text: &'a str,
text_index: usize,
parsed: SourceCode,
parsed_index: usize,
}

fn clamp_range(range: Range<usize>, min: usize, max: usize) -> Range<usize> {
range.start.clamp(min, max)..range.end.clamp(min, max)
}

impl<'a> Iterator for CommentReplaceIter<'a> {
type Item = Cow<'a, str>;
type Item = (Cow<'a, str>, &'a str);

fn next(&mut self) -> Option<Self::Item> {
let line_in = self.lines.next()?;
let mut markers = RE_COMMENT
.captures_iter(line_in)
.map(|cap| cap.get(0).unwrap())
.peekable();

// fast path
if self.block_depth == 0 && markers.peek().is_none() {
return Some(Cow::Borrowed(line_in));
if self.text_index >= self.text.len() {
return None;
}

let mut output = String::new();
let mut section_start = 0;

loop {
let mut next_marker = markers.next();
let mut section_end = next_marker.map(|m| m.start()).unwrap_or(line_in.len());

// skip partial tokens
while next_marker.is_some() && section_start > section_end {
next_marker = markers.next();
section_end = next_marker.map(|m| m.start()).unwrap_or(line_in.len());
let line_start = self.text_index;
let line_end = self.text[line_start..]
.find('\n') // TODO: Handle \r\n
.map(|i| line_start + i + 1)
.unwrap_or_else(|| self.text.len());
self.text_index = line_end;

let mut parts = Vec::new();
for (i, parsed_part) in self.parsed.parts.iter().enumerate().skip(self.parsed_index) {
let span = parsed_part.span();
if span.start >= line_end {
break;
}
if span.end <= line_start {
self.parsed_index = i + 1;
continue;
}
parts.push((parsed_part, clamp_range(span, line_start, line_end)));
}

if self.block_depth == 0 {
output.push_str(&line_in[section_start..section_end]);
} else {
output.extend(std::iter::repeat(' ').take(section_end - section_start));
assert!(parts.len() > 0);

// Fast path
if parts.len() == 1 {
match parts.into_iter().next().unwrap() {
(SourceCodePart::Text(_), span) => {
return Some((
Cow::Borrowed(&self.text[span]),
&self.text[line_start..line_end],
));
}
(
SourceCodePart::SingleLineComment(_) | SourceCodePart::MultiLineComment(_),
span,
) => {
let spaces = " ".repeat(span.len());
return Some((Cow::Owned(spaces), &self.text[line_start..line_end]));
}
}
}

match next_marker {
None => return Some(Cow::Owned(output)),
Some(marker) => {
match marker.as_str() {
"//" => {
// the specs (https://www.w3.org/TR/WGSL/#comment, https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.60.pdf @ 3.4) state that
// whichever comment-type starts first should cancel parsing of the other type
if self.block_depth == 0 {
output.extend(
std::iter::repeat(' ').take(line_in.len() - marker.start()),
);
return Some(Cow::Owned(output));
}
}
"/*" => {
self.block_depth += 1;
}
"*/" => {
self.block_depth = self.block_depth.saturating_sub(1);
}
_ => unreachable!(),
}
output.extend(std::iter::repeat(' ').take(marker.as_str().len()));
section_start = marker.end();
let mut output = String::new();
let mut last_end = line_start;
for (part, span) in parts.into_iter() {
output.push_str(&self.text[last_end..span.start]);
last_end = span.end;
match part {
SourceCodePart::Text(_) => {
output.push_str(&self.text[span]);
}
SourceCodePart::SingleLineComment(_) | SourceCodePart::MultiLineComment(_) => {
output.extend(std::iter::repeat(' ').take(span.len()));
}
}
}
}
}

pub trait CommentReplaceExt<'a> {
/// replace WGSL and GLSL comments with whitespace characters
fn replace_comments(&'a mut self) -> CommentReplaceIter;
assert!(last_end == line_end);
Some((Cow::Owned(output), &self.text[line_start..line_end]))
}
}

impl<'a> CommentReplaceExt<'a> for Lines<'a> {
fn replace_comments(&'a mut self) -> CommentReplaceIter {
CommentReplaceIter {
lines: self,
block_depth: 0,
}
/// Gives you an iterator that replaces comments in the input text with spaces.
/// The iterator will yield the same lines as the input text, but with comments replaced.
/// Lines will include the newline character at the end!
pub fn replace_comments(input: &str) -> CommentReplaceIter {
let parsed = parse_source(&mut Located::new(input)).unwrap();
CommentReplaceIter {
text: input,
text_index: 0,
parsed,
parsed_index: 0,
}
}

Expand All @@ -108,14 +196,10 @@ not commented
";

assert_eq!(
INPUT
.lines()
.replace_comments()
.zip(INPUT.lines())
.find(|(line, original)| {
(line != "not commented" && !line.chars().all(|c| c == ' '))
|| line.len() != original.len()
}),
replace_comments(INPUT).find(|(line, original)| {
(line.trim_end() != "not commented" && !line.chars().all(|c| c == ' ' || c == '\n'))
|| line.len() != original.len()
}),
None
);

Expand All @@ -139,8 +223,7 @@ not commented
];

for &(input, expected) in PARTIAL_TESTS.iter() {
let mut nasty_processed = input.lines();
let nasty_processed = nasty_processed.replace_comments().next().unwrap();
let nasty_processed = replace_comments(input).next().unwrap().0;
assert_eq!(&nasty_processed, expected);
}
}
25 changes: 11 additions & 14 deletions src/compose/preprocess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use indexmap::IndexMap;
use regex::Regex;

use super::{
comment_strip_iter::CommentReplaceExt,
comment_strip_iter::replace_comments,
parse_imports::{parse_imports, substitute_identifiers},
ComposerErrorInner, ImportDefWithOffset, ShaderDefValue,
};
Expand Down Expand Up @@ -245,8 +245,7 @@ impl Preprocessor {
let len = shader_str.len();

// this code broadly stolen from bevy_render::ShaderProcessor
let mut lines = shader_str.lines();
let mut lines = lines.replace_comments().zip(shader_str.lines()).peekable();
let mut lines = replace_comments(shader_str).peekable();

while let Some((mut line, original_line)) = lines.next() {
let mut output = false;
Expand All @@ -271,8 +270,9 @@ impl Preprocessor {

loop {
// output spaces for removed lines to keep spans consistent (errors report against substituted_source, which is not preprocessed)
final_string.extend(std::iter::repeat(" ").take(line.len()));
offset += line.len() + 1;
final_string
.extend(std::iter::repeat(" ").take(line.len().saturating_sub(1)));
offset += line.len();

// PERF: Ideally we don't do multiple `match_indices` passes over `line`
// in addition to the final pass for the import parse
Expand All @@ -283,7 +283,6 @@ impl Preprocessor {
// let import_lines = &shader_str[initial_offset..offset]
// but we need the comments removed, and the iterator approach doesn't make that easy
import_lines.push_str(&line);
import_lines.push('\n');

if open_count == 0 || lines.peek().is_none() {
break;
Expand Down Expand Up @@ -356,15 +355,15 @@ impl Preprocessor {
final_string.push_str(&item_replaced_line);
let diff = line.len().saturating_sub(item_replaced_line.len());
final_string.extend(std::iter::repeat(" ").take(diff));
offset += original_line.len() + 1;
offset += original_line.len();
output = true;
}
}

if !output {
// output spaces for removed lines to keep spans consistent (errors report against substituted_source, which is not preprocessed)
final_string.extend(std::iter::repeat(" ").take(line.len()));
offset += line.len() + 1;
final_string.extend(std::iter::repeat(" ").take(line.len().saturating_sub(1)));
offset += line.len();
}
final_string.push('\n');
}
Expand Down Expand Up @@ -398,10 +397,9 @@ impl Preprocessor {
let mut defines = HashMap::default();
let mut effective_defs = HashSet::default();

let mut lines = shader_str.lines();
let mut lines = lines.replace_comments().peekable();
let mut lines = replace_comments(shader_str).peekable();

while let Some(mut line) = lines.next() {
while let Some((mut line, _)) = lines.next() {
let (is_scope, def) = self.check_scope(&HashMap::default(), &line, None, offset)?;

if is_scope {
Expand All @@ -423,7 +421,6 @@ impl Preprocessor {
// let import_lines = &shader_str[initial_offset..offset]
// but we need the comments removed, and the iterator approach doesn't make that easy
import_lines.push_str(&line);
import_lines.push('\n');

if open_count == 0 || lines.peek().is_none() {
break;
Expand All @@ -432,7 +429,7 @@ impl Preprocessor {
// output spaces for removed lines to keep spans consistent (errors report against substituted_source, which is not preprocessed)
offset += line.len() + 1;

line = lines.next().unwrap();
line = lines.next().unwrap().0;
}

parse_imports(import_lines.as_str(), &mut declared_imports).map_err(
Expand Down
Loading