Skip to content

Commit

Permalink
Allow spaces in URL requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Sep 29, 2024
1 parent ada6b36 commit e548c81
Show file tree
Hide file tree
Showing 10 changed files with 398 additions and 47 deletions.
68 changes: 57 additions & 11 deletions crates/pep508-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -834,11 +834,19 @@ fn parse_extras_cursor<T: Pep508Url>(
/// Parse a raw string for a URL requirement, which could be either a URL or a local path, and which
/// could contain unexpanded environment variables.
///
/// When parsing, we eat characters until we see any of the following:
/// - A newline.
/// - A semicolon (marker) or hash (comment), _preceded_ by a space. We parse the URL until the last
/// non-whitespace character (inclusive).
/// - A semicolon (marker) or hash (comment) _followed_ by a space. We treat this as an error, since
/// the end of the URL is ambiguous.
///
/// For example:
/// - `https://pypi.org/project/requests/...`
/// - `file:///home/ferris/project/scripts/...`
/// - `file:../editable/`
/// - `../editable/`
/// - `../path to editable/`
/// - `https://download.pytorch.org/whl/torch_stable.html`
fn parse_url<T: Pep508Url>(
cursor: &mut Cursor,
Expand All @@ -847,7 +855,40 @@ fn parse_url<T: Pep508Url>(
// wsp*
cursor.eat_whitespace();
// <URI_reference>
let (start, len) = cursor.take_while(|char| !char.is_whitespace());
let (start, len) = {
let start = cursor.pos();
let mut len = 0;
while let Some((_, c)) = cursor.next() {
// If we see a line break, we're done.
if matches!(c, '\r' | '\n') {
break;
}

// If we see top-level whitespace, check if it's followed by a semicolon or hash. If so,
// end the URL at the last non-whitespace character.
if c.is_whitespace() {
let mut cursor = cursor.clone();
cursor.eat_whitespace();
if matches!(cursor.peek_char(), None | Some(';' | '#')) {
break;
}
}

len += c.len_utf8();

// If we see a top-level semicolon or hash followed by whitespace, we're done.
match c {
';' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
'#' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
_ => {}
}
}
(start, len)
};
let url = cursor.slice(start, len);
if url.is_empty() {
return Err(Pep508Error {
Expand Down Expand Up @@ -1087,16 +1128,21 @@ fn parse_pep508_requirement<T: Pep508Url>(
// wsp*
cursor.eat_whitespace();
if let Some((pos, char)) = cursor.next() {
if let Some(VersionOrUrl::Url(url)) = requirement_kind {
if marker.is_none() && url.to_string().ends_with(';') {
return Err(Pep508Error {
message: Pep508ErrorSource::String(
"Missing space before ';', the end of the URL is ambiguous".to_string(),
),
start: requirement_end - ';'.len_utf8(),
len: ';'.len_utf8(),
input: cursor.to_string(),
});
if marker.is_none() {
if let Some(VersionOrUrl::Url(url)) = requirement_kind {
let url = url.to_string();
for c in [';', '#'] {
if url.ends_with(c) {
return Err(Pep508Error {
message: Pep508ErrorSource::String(format!(
"Missing space before '{c}', the end of the URL is ambiguous"
)),
start: requirement_end - c.len_utf8(),
len: c.len_utf8(),
input: cursor.to_string(),
});
}
}
}
}
let message = if marker.is_none() {
Expand Down
75 changes: 52 additions & 23 deletions crates/pep508-rs/src/unnamed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,16 +175,20 @@ fn parse_unnamed_requirement<Url: UnnamedRequirementUrl>(
// wsp*
cursor.eat_whitespace();
if let Some((pos, char)) = cursor.next() {
if let Some(given) = url.given() {
if given.ends_with(';') && marker.is_none() {
return Err(Pep508Error {
message: Pep508ErrorSource::String(
"Missing space before ';', the end of the URL is ambiguous".to_string(),
),
start: requirement_end - ';'.len_utf8(),
len: ';'.len_utf8(),
input: cursor.to_string(),
});
if marker.is_none() {
if let Some(given) = url.given() {
for c in [';', '#'] {
if given.ends_with(c) {
return Err(Pep508Error {
message: Pep508ErrorSource::String(format!(
"Missing space before '{c}', the end of the URL is ambiguous"
)),
start: requirement_end - c.len_utf8(),
len: c.len_utf8(),
input: cursor.to_string(),
});
}
}
}
}
let message = if marker.is_none() {
Expand Down Expand Up @@ -349,9 +353,20 @@ fn preprocess_unnamed_url<Url: UnnamedRequirementUrl>(
/// Like [`crate::parse_url`], but allows for extras to be present at the end of the URL, to comply
/// with the non-PEP 508 extensions.
///
/// When parsing, we eat characters until we see any of the following:
/// - A newline.
/// - A semicolon (marker) or hash (comment), _preceded_ by a space. We parse the URL until the last
/// non-whitespace character (inclusive).
/// - A semicolon (marker) or hash (comment) _followed_ by a space. We treat this as an error, since
/// the end of the URL is ambiguous.
///
/// URLs can include extras at the end, enclosed in square brackets.
///
/// For example:
/// - `https://download.pytorch.org/whl/torch_stable.html[dev]`
/// - `../editable[dev]`
/// - `https://download.pytorch.org/whl/torch_stable.html ; python_version > "3.8"`
/// - `https://download.pytorch.org/whl/torch_stable.html # this is a comment`
fn parse_unnamed_url<Url: UnnamedRequirementUrl>(
cursor: &mut Cursor,
working_dir: Option<&Path>,
Expand All @@ -363,30 +378,44 @@ fn parse_unnamed_url<Url: UnnamedRequirementUrl>(
let (start, len) = {
let start = cursor.pos();
let mut len = 0;
let mut backslash = false;
let mut depth = 0u32;
while let Some((_, c)) = cursor.next() {
if backslash {
backslash = false;
} else if c == '\\' {
backslash = true;
} else if c == '[' {
// If we see a line break, we're done.
if matches!(c, '\r' | '\n') {
break;
}

// Track the depth of brackets.
if c == '[' {
depth = depth.saturating_add(1);
} else if c == ']' {
depth = depth.saturating_sub(1);
}

// If we see top-level whitespace, we're done.
// If we see top-level whitespace, check if it's followed by a semicolon or hash. If so,
// end the URL at the last non-whitespace character.
if depth == 0 && c.is_whitespace() {
break;
}

// If we see a line break, we're done.
if matches!(c, '\r' | '\n') {
break;
let mut cursor = cursor.clone();
cursor.eat_whitespace();
if matches!(cursor.peek_char(), None | Some(';' | '#')) {
break;
}
}

len += c.len_utf8();

// If we see a top-level semicolon or hash followed by whitespace, we're done.
if depth == 0 {
match c {
';' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
'#' if cursor.peek_char().is_some_and(char::is_whitespace) => {
break;
}
_ => {}
}
}
}
(start, len)
};
Expand Down
1 change: 1 addition & 0 deletions crates/requirements-txt/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,7 @@ mod test {

#[cfg(unix)]
#[test_case(Path::new("semicolon.txt"))]
#[test_case(Path::new("hash.txt"))]
#[tokio::test]
async fn parse_err(path: &Path) {
let working_dir = workspace_test_data_dir().join("requirements-txt");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,156 @@ RequirementsTxt {
),
hashes: [],
},
RequirementEntry {
requirement: Unnamed(
UnnamedRequirement {
url: VerbatimParsedUrl {
parsed_url: Directory(
ParsedDirectoryUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
install_path: "<REQUIREMENTS_DIR>/scripts/packages/black editable",
editable: false,
virtual: false,
},
),
verbatim: VerbatimUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
given: Some(
"./scripts/packages/black editable",
),
},
},
extras: [],
marker: true,
origin: Some(
File(
"<REQUIREMENTS_DIR>/bare-url.txt",
),
),
},
),
hashes: [],
},
RequirementEntry {
requirement: Unnamed(
UnnamedRequirement {
url: VerbatimParsedUrl {
parsed_url: Directory(
ParsedDirectoryUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
install_path: "<REQUIREMENTS_DIR>/scripts/packages/black editable",
editable: false,
virtual: false,
},
),
verbatim: VerbatimUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
given: Some(
"./scripts/packages/black editable",
),
},
},
extras: [],
marker: python_full_version >= '3.9',
origin: Some(
File(
"<REQUIREMENTS_DIR>/bare-url.txt",
),
),
},
),
hashes: [],
},
RequirementEntry {
requirement: Unnamed(
UnnamedRequirement {
url: VerbatimParsedUrl {
parsed_url: Directory(
ParsedDirectoryUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
install_path: "<REQUIREMENTS_DIR>/scripts/packages/black editable",
editable: false,
virtual: false,
},
),
verbatim: VerbatimUrl {
url: Url {
scheme: "file",
cannot_be_a_base: false,
username: "",
password: None,
host: None,
port: None,
path: "<REQUIREMENTS_DIR>/scripts/packages/black%20editable",
query: None,
fragment: None,
},
given: Some(
"./scripts/packages/black editable",
),
},
},
extras: [],
marker: true,
origin: Some(
File(
"<REQUIREMENTS_DIR>/bare-url.txt",
),
),
},
),
hashes: [],
},
],
constraints: [],
editables: [],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
source: crates/requirements-txt/src/lib.rs
expression: actual
---
RequirementsTxtFileError {
file: "<REQUIREMENTS_DIR>/hash.txt",
error: Pep508 {
source: Pep508Error {
message: String(
"Missing space before '#', the end of the URL is ambiguous",
),
start: 10,
len: 1,
input: "./editable# comment",
},
start: 49,
end: 68,
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ RequirementsTxtFileError {
message: String(
"Missing space before ';', the end of the URL is ambiguous",
),
start: 11,
start: 10,
len: 1,
input: "./editable; python_version >= \"3.9\" and os_name == \"posix\"",
},
Expand Down
Loading

0 comments on commit e548c81

Please sign in to comment.