Skip to content

Commit

Permalink
Merge pull request #13 from zyk-mjzs/master
Browse files Browse the repository at this point in the history
Fixed the issue of Unicode with spaces and characters being repeated insert
  • Loading branch information
d0rianb authored Apr 21, 2024
2 parents 7651b49 + 69338ee commit 6ce3cf9
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
3 changes: 2 additions & 1 deletion src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ impl Lexer {
recursive_tokenize!(tail, ret);

// \u1234 \u1234 is ok, but \u1234 \u1234 is lost a space, \u1234 \u1234 lost two spaces, and so on
if control_word.0 == ControlWord::Unicode && tail.len() > 0 {
// \u1234 1 -> No need to walk in here, it will enter plain text
if control_word.0 == ControlWord::Unicode && tail.len() > 0 && tail.trim() == "" {
ret.push(Token::PlainText(tail));
}
return Ok(ret);
Expand Down
4 changes: 2 additions & 2 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -667,10 +667,10 @@ pub mod tests {
\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
\f0\fs24 \cf0 \uc0\u21834 \u21834 }"#;
\f0\fs24 \cf0 \uc0\u21834 1\u21834 1 2 }"#;
let tokens = Lexer::scan(rtf).unwrap();
let document = Parser::new(tokens).parse().unwrap();
assert_eq!(&document.body[0].text, "啊 ");
assert_eq!(&document.body[0].text, "啊 1啊 1 2 ");
}

#[test]
Expand Down

0 comments on commit 6ce3cf9

Please sign in to comment.