From 8c3d206aabf18b6e999e3d22660bc56b03209574 Mon Sep 17 00:00:00 2001 From: zyk <474964724@qq.com> Date: Sun, 14 Apr 2024 21:12:11 +0800 Subject: [PATCH 1/2] Add a match for the combination of OpeningBracket and Pard in the parse_header method --- src/lexer.rs | 2 +- src/parser.rs | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 02fb8fb..af5f261 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -154,7 +154,7 @@ impl Lexer { #[cfg(test)] pub(crate) mod tests { use crate::lexer::Lexer; - use crate::tokens::ControlWord::{Ansi, Bold, ColorBlue, ColorGreen, ColorNumber, ColorRed, FontNumber, FontSize, FontTable, Italic, Par, Pard, Rtf, Underline, Unknown}; + use crate::tokens::ControlWord::{Ansi, Bold, ColorBlue, ColorNumber, ColorRed, FontNumber, FontSize, FontTable, Italic, Par, Pard, Rtf, Underline, Unknown}; use crate::tokens::Property::*; use crate::tokens::Token::*; diff --git a/src/parser.rs b/src/parser.rs index 8c2e517..ef5800c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -277,6 +277,10 @@ impl<'a> Parser<'a> { break; } } + (Some(Token::OpeningBracket), Some(header_control_word!(Pard))) => { + self.tokens.insert(self.cursor, Token::OpeningBracket); + break; + } // Break on par, pard, sectd, or plain - We no longer are in the header (Some(header_control_word!(Pard) | header_control_word!(Sectd) | header_control_word!(Plain) | header_control_word!(Par)), _) => break, // Break if it declares a font after the font table --> no more in the header @@ -664,9 +668,16 @@ pub mod tests { \pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0 \f0\fs24 \cf0 \uc0\u21834 \u21834 }"#; - // \f0\fs24 \cf0 \uc0\u21834 \u21834 }"#; let tokens = Lexer::scan(rtf).unwrap(); let document = Parser::new(tokens).parse().unwrap(); assert_eq!(&document.body[0].text, "啊 啊"); } + + #[test] + fn parse_opening_bracket_and_pard() { + let rtf = r#"{\rtf1\ansi\deff0{\fonttbl {\f0\fnil\fcharset0 Calibri;}{\f1\fnil\fcharset2 Symbol;}}{\colortbl ;}{\pard a\sb70\par}}"#; + let tokens = Lexer::scan(rtf).unwrap(); + let document = Parser::new(tokens).parse().unwrap(); + assert_eq!(&document.body[0].text, "a"); + } } From 2d1f0cb3ea7254ec074dd3ebf4cc85c5144dc546 Mon Sep 17 00:00:00 2001 From: zyk <474964724@qq.com> Date: Sun, 14 Apr 2024 22:51:08 +0800 Subject: [PATCH 2/2] add pard, sected, plain, par with OpeningBracket option in parse_header --- src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index ef5800c..6b9468e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -277,7 +277,7 @@ impl<'a> Parser<'a> { break; } } - (Some(Token::OpeningBracket), Some(header_control_word!(Pard))) => { + (Some(Token::OpeningBracket), Some(header_control_word!(Pard) | header_control_word!(Sectd) | header_control_word!(Plain) | header_control_word!(Par))) => { self.tokens.insert(self.cursor, Token::OpeningBracket); break; }