From 60677e5449a83c1aa88b542bdbb1499d0ed1ff32 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Tue, 24 Sep 2024 16:20:50 +0200 Subject: [PATCH] Rust: fix panic when the last character in a range is multi-byte --- rust/extractor/src/translate/base.rs | 21 +++++++--- .../ql/test/extractor-tests/utf8/ast.expected | 39 +++++++++++++++++++ rust/ql/test/extractor-tests/utf8/ast.ql | 3 ++ .../extractor-tests/utf8/utf8-identifiers.rs | 12 ++++++ 4 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 rust/ql/test/extractor-tests/utf8/ast.expected create mode 100644 rust/ql/test/extractor-tests/utf8/ast.ql create mode 100644 rust/ql/test/extractor-tests/utf8/utf8-identifiers.rs diff --git a/rust/extractor/src/translate/base.rs b/rust/extractor/src/translate/base.rs index 9bca314216423..8e80d967bec5a 100644 --- a/rust/extractor/src/translate/base.rs +++ b/rust/extractor/src/translate/base.rs @@ -70,12 +70,21 @@ impl Translator { } pub fn location(&self, range: TextRange) -> (LineCol, LineCol) { let start = self.line_index.line_col(range.start()); - let end = self.line_index.line_col( - range - .end() - .checked_sub(TextSize::new(1)) - .unwrap_or(range.end()), - ); + let range_end = range.end(); + // QL end positions are inclusive, while TextRange offsets are exclusive and point at the position + // right after the last character of the range. We need to shift the end offset one character to the left to + // get the right inclusive QL position. Unfortunately, simply subtracting `1` from the end-offset may cause + // the offset to point in the middle of a mult-byte character, resulting in a `panic`. Therefore we use `try_line_col` + // with decreasing offsets to find the start of the last character included in the range. + for i in 1..4 { + if let Some(end) = range_end + .checked_sub(i.into()) + .and_then(|x| self.line_index.try_line_col(x)) + { + return (start, end); + } + } + let end = self.line_index.line_col(range_end); (start, end) } pub fn emit_location(&mut self, label: Label, node: impl ast::AstNode) { diff --git a/rust/ql/test/extractor-tests/utf8/ast.expected b/rust/ql/test/extractor-tests/utf8/ast.expected new file mode 100644 index 0000000000000..1d50531f3bebf --- /dev/null +++ b/rust/ql/test/extractor-tests/utf8/ast.expected @@ -0,0 +1,39 @@ +| lib.rs:1:1:3:22 | SourceFile | +| lib.rs:2:1:2:8 | Module | +| lib.rs:2:5:2:7 | Name | +| lib.rs:3:1:3:8 | Module | +| lib.rs:3:5:3:8 | Name | +| lib.rs:3:10:3:20 | NameRef | +| lib.rs:3:10:3:20 | Path | +| lib.rs:3:10:3:20 | PathSegment | +| lib.rs:3:10:3:21 | MacroCall | +| utf8-identifiers.rs:1:1:4:6 | foo | +| utf8-identifiers.rs:1:1:12:2 | SourceFile | +| utf8-identifiers.rs:1:4:1:6 | Name | +| utf8-identifiers.rs:1:7:4:1 | GenericParamList | +| utf8-identifiers.rs:2:5:2:8 | Lifetime | +| utf8-identifiers.rs:2:5:2:8 | LifetimeParam | +| utf8-identifiers.rs:3:5:3:7 | Name | +| utf8-identifiers.rs:3:5:3:7 | TypeParam | +| utf8-identifiers.rs:4:2:4:3 | ParamList | +| utf8-identifiers.rs:4:5:4:6 | BlockExpr | +| utf8-identifiers.rs:4:5:4:6 | StmtList | +| utf8-identifiers.rs:6:1:8:1 | Struct | +| utf8-identifiers.rs:6:8:6:8 | Name | +| utf8-identifiers.rs:6:10:8:1 | RecordFieldList | +| utf8-identifiers.rs:7:5:7:7 | Name | +| utf8-identifiers.rs:7:5:7:13 | RecordField | +| utf8-identifiers.rs:7:9:7:13 | NameRef | +| utf8-identifiers.rs:7:9:7:13 | Path | +| utf8-identifiers.rs:7:9:7:13 | PathSegment | +| utf8-identifiers.rs:7:9:7:13 | PathType | +| utf8-identifiers.rs:10:1:10:3 | Visibility | +| utf8-identifiers.rs:10:1:12:1 | main | +| utf8-identifiers.rs:10:8:10:11 | Name | +| utf8-identifiers.rs:10:12:10:13 | ParamList | +| utf8-identifiers.rs:10:15:12:1 | BlockExpr | +| utf8-identifiers.rs:10:15:12:1 | StmtList | +| utf8-identifiers.rs:11:5:11:24 | LetStmt | +| utf8-identifiers.rs:11:9:11:11 | IdentPat | +| utf8-identifiers.rs:11:9:11:11 | Name | +| utf8-identifiers.rs:11:14:11:23 | LiteralExpr | diff --git a/rust/ql/test/extractor-tests/utf8/ast.ql b/rust/ql/test/extractor-tests/utf8/ast.ql new file mode 100644 index 0000000000000..420f7e94eb9aa --- /dev/null +++ b/rust/ql/test/extractor-tests/utf8/ast.ql @@ -0,0 +1,3 @@ +import codeql.rust.elements + +select any(AstNode n) diff --git a/rust/ql/test/extractor-tests/utf8/utf8-identifiers.rs b/rust/ql/test/extractor-tests/utf8/utf8-identifiers.rs new file mode 100644 index 0000000000000..579dc82ab5f06 --- /dev/null +++ b/rust/ql/test/extractor-tests/utf8/utf8-identifiers.rs @@ -0,0 +1,12 @@ +fn foo< + 'β, + γ +>() {} + +struct X { + δ: usize +} + +pub fn main() { + let α = 0.00001f64; +}