Merge pull request #13 from zyk-mjzs/master

Fixed the issue of Unicode with spaces and characters being repeated insert
d0rianb · Apr 21, 2024 · 6ce3cf9 · 6ce3cf9
2 parents 7651b49 + 69338ee
commit 6ce3cf9
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/src/lexer.rs b/src/lexer.rs
@@ -124,7 +124,8 @@ impl Lexer {
                     recursive_tokenize!(tail, ret);
 
                     // \u1234 \u1234 is ok, but \u1234  \u1234 is lost a space, \u1234   \u1234 lost two spaces, and so on
-                    if control_word.0 == ControlWord::Unicode && tail.len() > 0 {
+                    // \u1234  1 -> No need to walk in here, it will enter plain text
+                    if control_word.0 == ControlWord::Unicode && tail.len() > 0 && tail.trim() == "" {
                         ret.push(Token::PlainText(tail));
                     }
                     return Ok(ret);

diff --git a/src/parser.rs b/src/parser.rs
@@ -667,10 +667,10 @@ pub mod tests {
             \paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
             \pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
             
-            \f0\fs24 \cf0 \uc0\u21834  \u21834 }"#;
+            \f0\fs24 \cf0 \uc0\u21834  1\u21834   1 2 }"#;
         let tokens = Lexer::scan(rtf).unwrap();
         let document = Parser::new(tokens).parse().unwrap();
-        assert_eq!(&document.body[0].text, "啊 啊");
+        assert_eq!(&document.body[0].text, "啊 1啊  1 2 ");
     }
 
     #[test]