Skip to content

Commit

Permalink
Allow spaces in URL requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Sep 28, 2024
1 parent 9312a08 commit 44f2729
Show file tree
Hide file tree
Showing 8 changed files with 222 additions and 26 deletions.
14 changes: 0 additions & 14 deletions crates/pep508-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1054,8 +1054,6 @@ fn parse_pep508_requirement<T: Pep508Url>(
}
};

let requirement_end = cursor.pos();

// If the requirement consists solely of a package name, and that name appears to be an archive,
// treat it as a URL requirement, for consistency and security. (E.g., `requests-2.26.0.tar.gz`
// is a valid Python package name, but we should treat it as a reference to a file.)
Expand Down Expand Up @@ -1087,18 +1085,6 @@ fn parse_pep508_requirement<T: Pep508Url>(
// wsp*
cursor.eat_whitespace();
if let Some((pos, char)) = cursor.next() {
if let Some(VersionOrUrl::Url(url)) = requirement_kind {
if marker.is_none() && url.to_string().ends_with(';') {
return Err(Pep508Error {
message: Pep508ErrorSource::String(
"Missing space before ';', the end of the URL is ambiguous".to_string(),
),
start: requirement_end - ';'.len_utf8(),
len: ';'.len_utf8(),
input: cursor.to_string(),
});
}
}
let message = if marker.is_none() {
format!(r#"Expected end of input or `;`, found `{char}`"#)
} else {
Expand Down
57 changes: 46 additions & 11 deletions crates/pep508-rs/src/unnamed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,11 @@ fn preprocess_unnamed_url<Url: UnnamedRequirementUrl>(
/// Like [`crate::parse_url`], but allows for extras to be present at the end of the URL, to comply
/// with the non-PEP 508 extensions.
///
/// When parsing, we take characters until we see any of the following:
/// - A newline
/// - A semicolon (marker) or hash (comment), _preceded_ by a space.
/// - A semicolon (marker) or hash (comment) _followed_ by a space.
///
/// For example:
/// - `https://download.pytorch.org/whl/torch_stable.html[dev]`
/// - `../editable[dev]`
Expand All @@ -363,27 +368,57 @@ fn parse_unnamed_url<Url: UnnamedRequirementUrl>(
let (start, len) = {
let start = cursor.pos();
let mut len = 0;
let mut backslash = false;
let mut depth = 0u32;
while let Some((_, c)) = cursor.next() {
if backslash {
backslash = false;
} else if c == '\\' {
backslash = true;
} else if c == '[' {
// If we see a line break, we're done.
if matches!(c, '\r' | '\n') {
break;
}

// Track the depth of brackets.
if c == '[' {
depth = depth.saturating_add(1);
} else if c == ']' {
depth = depth.saturating_sub(1);
}

// If we see top-level whitespace, we're done.
// If we see top-level whitespace, check if it's followed by a semicolon or hash. If so,
// end the URL at the last non-whitespace character.
if depth == 0 && c.is_whitespace() {
break;
let mut cursor = cursor.clone();
cursor.eat_whitespace();
if matches!(cursor.peek_char(), None | Some(';' | '#')) {
break;
}
}

// If we see a line break, we're done.
if matches!(c, '\r' | '\n') {
break;
// If we see a top-level semicolon or hash followed by whitespace, we're done.
if depth == 0 && cursor.peek_char().is_some_and(char::is_whitespace) {
match c {
';' => {
return Err(Pep508Error {
message: Pep508ErrorSource::String(
"Missing space before ';', the end of the URL is ambiguous"
.to_string(),
),
start: start + len,
len: ';'.len_utf8(),
input: cursor.to_string(),
});
}
'#' => {
return Err(Pep508Error {
message: Pep508ErrorSource::String(
"Missing space before '#', the end of the URL is ambiguous"
.to_string(),
),
start: start + len,
len: '#'.len_utf8(),
input: cursor.to_string(),
});
}
_ => {}
}
}

len += c.len_utf8();
Expand Down
1 change: 1 addition & 0 deletions crates/requirements-txt/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,7 @@ mod test {

#[cfg(unix)]
#[test_case(Path::new("semicolon.txt"))]
#[test_case(Path::new("hash.txt"))]
#[tokio::test]
async fn parse_err(path: &Path) {
let working_dir = workspace_test_data_dir().join("requirements-txt");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,156 @@ RequirementsTxt {
),
hashes: [],
},
RequirementEntry {
requirement: Unnamed(
UnnamedRequirement {
url: VerbatimParsedUrl {
parsed_url: Directory(
ParsedDirectoryUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
install_path: "<REQUIREMENTS_DIR>/scripts/packages/black editable",
editable: false,
virtual: false,
},
),
verbatim: VerbatimUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
given: Some(
"./scripts/packages/black editable",
),
},
},
extras: [],
marker: true,
origin: Some(
File(
"<REQUIREMENTS_DIR>/bare-url.txt",
),
),
},
),
hashes: [],
},
RequirementEntry {
requirement: Unnamed(
UnnamedRequirement {
url: VerbatimParsedUrl {
parsed_url: Directory(
ParsedDirectoryUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
install_path: "<REQUIREMENTS_DIR>/scripts/packages/black editable",
editable: false,
virtual: false,
},
),
verbatim: VerbatimUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
given: Some(
"./scripts/packages/black editable",
),
},
},
extras: [],
marker: python_full_version >= '3.9',
origin: Some(
File(
"<REQUIREMENTS_DIR>/bare-url.txt",
),
),
},
),
hashes: [],
},
RequirementEntry {
requirement: Unnamed(
UnnamedRequirement {
url: VerbatimParsedUrl {
parsed_url: Directory(
ParsedDirectoryUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
install_path: "<REQUIREMENTS_DIR>/scripts/packages/black editable",
editable: false,
virtual: false,
},
),
verbatim: VerbatimUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
given: Some(
"./scripts/packages/black editable",
),
},
},
extras: [],
marker: true,
origin: Some(
File(
"<REQUIREMENTS_DIR>/bare-url.txt",
),
),
},
),
hashes: [],
},
],
constraints: [],
editables: [],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
source: crates/requirements-txt/src/lib.rs
expression: actual
---
RequirementsTxtFileError {
file: "<REQUIREMENTS_DIR>/hash.txt",
error: Pep508 {
source: Pep508Error {
message: String(
"Missing space before '#', the end of the URL is ambiguous",
),
start: 10,
len: 1,
input: "./editable# comment",
},
start: 49,
end: 68,
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ RequirementsTxtFileError {
message: String(
"Missing space before ';', the end of the URL is ambiguous",
),
start: 11,
start: 10,
len: 1,
input: "./editable; python_version >= \"3.9\" and os_name == \"posix\"",
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
./scripts/packages/black_editable
./scripts/packages/black_editable[dev]
file:///scripts/packages/black_editable
./scripts/packages/black editable
./scripts/packages/black editable ; python_version >= "3.9"
./scripts/packages/black editable # comment
2 changes: 2 additions & 0 deletions crates/requirements-txt/test-data/requirements-txt/hash.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Disallowed (missing whitespace before hash)
-e ./editable# comment

0 comments on commit 44f2729

Please sign in to comment.