diff --git a/crates/pep508-rs/src/lib.rs b/crates/pep508-rs/src/lib.rs index 01d1e40b58b4..3b23db7ee059 100644 --- a/crates/pep508-rs/src/lib.rs +++ b/crates/pep508-rs/src/lib.rs @@ -1054,8 +1054,6 @@ fn parse_pep508_requirement( } }; - let requirement_end = cursor.pos(); - // If the requirement consists solely of a package name, and that name appears to be an archive, // treat it as a URL requirement, for consistency and security. (E.g., `requests-2.26.0.tar.gz` // is a valid Python package name, but we should treat it as a reference to a file.) @@ -1087,18 +1085,6 @@ fn parse_pep508_requirement( // wsp* cursor.eat_whitespace(); if let Some((pos, char)) = cursor.next() { - if let Some(VersionOrUrl::Url(url)) = requirement_kind { - if marker.is_none() && url.to_string().ends_with(';') { - return Err(Pep508Error { - message: Pep508ErrorSource::String( - "Missing space before ';', the end of the URL is ambiguous".to_string(), - ), - start: requirement_end - ';'.len_utf8(), - len: ';'.len_utf8(), - input: cursor.to_string(), - }); - } - } let message = if marker.is_none() { format!(r#"Expected end of input or `;`, found `{char}`"#) } else { diff --git a/crates/pep508-rs/src/unnamed.rs b/crates/pep508-rs/src/unnamed.rs index 06c4757438c3..3dd848ee0698 100644 --- a/crates/pep508-rs/src/unnamed.rs +++ b/crates/pep508-rs/src/unnamed.rs @@ -349,9 +349,20 @@ fn preprocess_unnamed_url( /// Like [`crate::parse_url`], but allows for extras to be present at the end of the URL, to comply /// with the non-PEP 508 extensions. /// +/// When parsing, we eat characters until we see any of the following: +/// - A newline. +/// - A semicolon (marker) or hash (comment), _preceded_ by a space. We parse the URL until the last +/// non-whitespace character (inclusive). +/// - A semicolon (marker) or hash (comment) _followed_ by a space. We treat this as an error, since +/// the end of the URL is ambiguous. +/// +/// URLs can include extras at the end, enclosed in square brackets. +/// /// For example: /// - `https://download.pytorch.org/whl/torch_stable.html[dev]` /// - `../editable[dev]` +/// - `https://download.pytorch.org/whl/torch_stable.html ; python_version > "3.8"` +/// - `https://download.pytorch.org/whl/torch_stable.html # this is a comment` fn parse_unnamed_url( cursor: &mut Cursor, working_dir: Option<&Path>, @@ -363,27 +374,57 @@ fn parse_unnamed_url( let (start, len) = { let start = cursor.pos(); let mut len = 0; - let mut backslash = false; let mut depth = 0u32; while let Some((_, c)) = cursor.next() { - if backslash { - backslash = false; - } else if c == '\\' { - backslash = true; - } else if c == '[' { + // If we see a line break, we're done. + if matches!(c, '\r' | '\n') { + break; + } + + // Track the depth of brackets. + if c == '[' { depth = depth.saturating_add(1); } else if c == ']' { depth = depth.saturating_sub(1); } - // If we see top-level whitespace, we're done. + // If we see top-level whitespace, check if it's followed by a semicolon or hash. If so, + // end the URL at the last non-whitespace character. if depth == 0 && c.is_whitespace() { - break; + let mut cursor = cursor.clone(); + cursor.eat_whitespace(); + if matches!(cursor.peek_char(), None | Some(';' | '#')) { + break; + } } - // If we see a line break, we're done. - if matches!(c, '\r' | '\n') { - break; + // If we see a top-level semicolon or hash followed by whitespace, we're done. + if depth == 0 { + match c { + ';' if cursor.peek_char().is_some_and(char::is_whitespace) => { + return Err(Pep508Error { + message: Pep508ErrorSource::String( + "Missing space before ';', the end of the URL is ambiguous" + .to_string(), + ), + start: start + len, + len: ';'.len_utf8(), + input: cursor.to_string(), + }); + } + '#' if cursor.peek_char().is_some_and(char::is_whitespace) => { + return Err(Pep508Error { + message: Pep508ErrorSource::String( + "Missing space before '#', the end of the URL is ambiguous" + .to_string(), + ), + start: start + len, + len: '#'.len_utf8(), + input: cursor.to_string(), + }); + } + _ => {} + } } len += c.len_utf8(); diff --git a/crates/requirements-txt/src/lib.rs b/crates/requirements-txt/src/lib.rs index 5a7fd7cb377c..93f71731db46 100644 --- a/crates/requirements-txt/src/lib.rs +++ b/crates/requirements-txt/src/lib.rs @@ -1325,6 +1325,7 @@ mod test { #[cfg(unix)] #[test_case(Path::new("semicolon.txt"))] + #[test_case(Path::new("hash.txt"))] #[tokio::test] async fn parse_err(path: &Path) { let working_dir = workspace_test_data_dir().join("requirements-txt"); diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-bare-url.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-bare-url.txt.snap index c74fdedd8fdf..7689453b66ba 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-bare-url.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-bare-url.txt.snap @@ -158,6 +158,156 @@ RequirementsTxt { ), hashes: [], }, + RequirementEntry { + requirement: Unnamed( + UnnamedRequirement { + url: VerbatimParsedUrl { + parsed_url: Directory( + ParsedDirectoryUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + install_path: "/scripts/packages/black editable", + editable: false, + virtual: false, + }, + ), + verbatim: VerbatimUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + given: Some( + "./scripts/packages/black editable", + ), + }, + }, + extras: [], + marker: true, + origin: Some( + File( + "/bare-url.txt", + ), + ), + }, + ), + hashes: [], + }, + RequirementEntry { + requirement: Unnamed( + UnnamedRequirement { + url: VerbatimParsedUrl { + parsed_url: Directory( + ParsedDirectoryUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + install_path: "/scripts/packages/black editable", + editable: false, + virtual: false, + }, + ), + verbatim: VerbatimUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + given: Some( + "./scripts/packages/black editable", + ), + }, + }, + extras: [], + marker: python_full_version >= '3.9', + origin: Some( + File( + "/bare-url.txt", + ), + ), + }, + ), + hashes: [], + }, + RequirementEntry { + requirement: Unnamed( + UnnamedRequirement { + url: VerbatimParsedUrl { + parsed_url: Directory( + ParsedDirectoryUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + install_path: "/scripts/packages/black editable", + editable: false, + virtual: false, + }, + ), + verbatim: VerbatimUrl { + url: Url { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/scripts/packages/black%20editable", + query: None, + fragment: None, + }, + given: Some( + "./scripts/packages/black editable", + ), + }, + }, + extras: [], + marker: true, + origin: Some( + File( + "/bare-url.txt", + ), + ), + }, + ), + hashes: [], + }, ], constraints: [], editables: [], diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-hash.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-hash.txt.snap new file mode 100644 index 000000000000..c023fcaa804e --- /dev/null +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-hash.txt.snap @@ -0,0 +1,19 @@ +--- +source: crates/requirements-txt/src/lib.rs +expression: actual +--- +RequirementsTxtFileError { + file: "/hash.txt", + error: Pep508 { + source: Pep508Error { + message: String( + "Missing space before '#', the end of the URL is ambiguous", + ), + start: 10, + len: 1, + input: "./editable# comment", + }, + start: 49, + end: 68, + }, +} diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-semicolon.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-semicolon.txt.snap index 768f6fd9ff66..b35272fb749f 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-semicolon.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-unix-semicolon.txt.snap @@ -9,7 +9,7 @@ RequirementsTxtFileError { message: String( "Missing space before ';', the end of the URL is ambiguous", ), - start: 11, + start: 10, len: 1, input: "./editable; python_version >= \"3.9\" and os_name == \"posix\"", }, diff --git a/crates/requirements-txt/test-data/requirements-txt/bare-url.txt b/crates/requirements-txt/test-data/requirements-txt/bare-url.txt index 080038a8c86d..2fef7fd16a89 100644 --- a/crates/requirements-txt/test-data/requirements-txt/bare-url.txt +++ b/crates/requirements-txt/test-data/requirements-txt/bare-url.txt @@ -1,3 +1,6 @@ ./scripts/packages/black_editable ./scripts/packages/black_editable[dev] file:///scripts/packages/black_editable +./scripts/packages/black editable +./scripts/packages/black editable ; python_version >= "3.9" +./scripts/packages/black editable # comment diff --git a/crates/requirements-txt/test-data/requirements-txt/hash.txt b/crates/requirements-txt/test-data/requirements-txt/hash.txt new file mode 100644 index 000000000000..d3d026f1be74 --- /dev/null +++ b/crates/requirements-txt/test-data/requirements-txt/hash.txt @@ -0,0 +1,2 @@ +# Disallowed (missing whitespace before hash) +-e ./editable# comment