From 1734c7ed507e5ca7b69d6377da477c711e653974 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 27 Jul 2024 14:38:16 -0400 Subject: [PATCH] Use existing METADATA parser in wheel installer (#5508) --- Cargo.lock | 1 - crates/install-wheel-rs/Cargo.toml | 1 - crates/install-wheel-rs/src/linker.rs | 30 ++++++------ crates/install-wheel-rs/src/metadata.rs | 30 ------------ crates/install-wheel-rs/src/wheel.rs | 49 ++------------------ crates/pypi-types/src/metadata.rs | 59 ++++++++++++++++++++++++ crates/uv/src/commands/pip/operations.rs | 5 +- 7 files changed, 77 insertions(+), 98 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c68161995a0b..d92610ebf4df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1821,7 +1821,6 @@ dependencies = [ "distribution-filename", "fs-err", "indoc", - "mailparse", "pathdiff", "pep440_rs", "platform-info", diff --git a/crates/install-wheel-rs/Cargo.toml b/crates/install-wheel-rs/Cargo.toml index 7ca22b8ed5c6..eec879177f6e 100644 --- a/crates/install-wheel-rs/Cargo.toml +++ b/crates/install-wheel-rs/Cargo.toml @@ -33,7 +33,6 @@ configparser = { workspace = true } csv = { workspace = true } data-encoding = { workspace = true } fs-err = { workspace = true } -mailparse = { workspace = true } pathdiff = { workspace = true } platform-info = { workspace = true } reflink-copy = { workspace = true } diff --git a/crates/install-wheel-rs/src/linker.rs b/crates/install-wheel-rs/src/linker.rs index 5eb48195ce21..1bc70aadf4b9 100644 --- a/crates/install-wheel-rs/src/linker.rs +++ b/crates/install-wheel-rs/src/linker.rs @@ -2,28 +2,25 @@ //! reading from a zip file. use std::path::{Path, PathBuf}; -use std::str::FromStr; use std::sync::{Arc, Mutex}; use std::time::SystemTime; use distribution_filename::WheelFilename; use fs_err as fs; use fs_err::{DirEntry, File}; -use pep440_rs::Version; -use pypi_types::DirectUrl; +use pypi_types::{DirectUrl, Metadata12}; use reflink_copy as reflink; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use tempfile::tempdir_in; use tracing::{debug, instrument}; -use uv_normalize::PackageName; use uv_warnings::warn_user_once; use walkdir::WalkDir; use crate::script::{scripts_from_ini, Script}; use crate::wheel::{ - extra_dist_info, install_data, parse_metadata, parse_wheel_file, read_record_file, - write_script_entrypoints, LibKind, + extra_dist_info, install_data, parse_wheel_file, read_record_file, write_script_entrypoints, + LibKind, }; use crate::{Error, Layout}; @@ -49,16 +46,15 @@ pub fn install_wheel( ) -> Result<(), Error> { let dist_info_prefix = find_dist_info(&wheel)?; let metadata = dist_info_metadata(&dist_info_prefix, &wheel)?; - let (name, version) = parse_metadata(&dist_info_prefix, &metadata)?; + let Metadata12 { name, version, .. } = Metadata12::parse_metadata(&metadata) + .map_err(|err| Error::InvalidWheel(err.to_string()))?; // Validate the wheel name and version. { - let name = PackageName::from_str(&name)?; if name != filename.name { return Err(Error::MismatchedName(name, filename.name.clone())); } - let version = Version::from_str(&version)?; if version != filename.version && version != filename.version.clone().without_local() { return Err(Error::MismatchedVersion(version, filename.version.clone())); } @@ -76,13 +72,13 @@ pub fn install_wheel( // > 1.c If Root-Is-Purelib == ‘true’, unpack archive into purelib (site-packages). // > 1.d Else unpack archive into platlib (site-packages). - debug!(name, "Extracting file"); + debug!(?name, "Extracting file"); let site_packages = match lib_kind { LibKind::Pure => &layout.scheme.purelib, LibKind::Plat => &layout.scheme.platlib, }; let num_unpacked = link_mode.link_wheel_files(site_packages, &wheel, locks)?; - debug!(name, "Extracted {num_unpacked} files"); + debug!(?name, "Extracted {num_unpacked} files"); // Read the RECORD file. let mut record_file = File::open( @@ -96,9 +92,9 @@ pub fn install_wheel( parse_scripts(&wheel, &dist_info_prefix, None, layout.python_version.1)?; if console_scripts.is_empty() && gui_scripts.is_empty() { - debug!(name, "No entrypoints"); + debug!(?name, "No entrypoints"); } else { - debug!(name, "Writing entrypoints"); + debug!(?name, "Writing entrypoints"); fs_err::create_dir_all(&layout.scheme.scripts)?; write_script_entrypoints(layout, site_packages, &console_scripts, &mut record, false)?; @@ -109,7 +105,7 @@ pub fn install_wheel( // 2.b Move each subtree of distribution-1.0.data/ onto its destination path. Each subdirectory of distribution-1.0.data/ is a key into a dict of destination directories, such as distribution-1.0.data/(purelib|platlib|headers|scripts|data). The initially supported paths are taken from distutils.command.install. let data_dir = site_packages.join(format!("{dist_info_prefix}.data")); if data_dir.is_dir() { - debug!(name, "Installing data"); + debug!(?name, "Installing data"); install_data( layout, site_packages, @@ -124,10 +120,10 @@ pub fn install_wheel( // 2.e Remove empty distribution-1.0.data directory. fs::remove_dir_all(data_dir)?; } else { - debug!(name, "No data"); + debug!(?name, "No data"); } - debug!(name, "Writing extra metadata"); + debug!(?name, "Writing extra metadata"); extra_dist_info( site_packages, &dist_info_prefix, @@ -137,7 +133,7 @@ pub fn install_wheel( &mut record, )?; - debug!(name, "Writing record"); + debug!(?name, "Writing record"); let mut record_writer = csv::WriterBuilder::new() .has_headers(false) .escape(b'"') diff --git a/crates/install-wheel-rs/src/metadata.rs b/crates/install-wheel-rs/src/metadata.rs index d0334c2d5023..da3622b35a55 100644 --- a/crates/install-wheel-rs/src/metadata.rs +++ b/crates/install-wheel-rs/src/metadata.rs @@ -11,36 +11,6 @@ use uv_normalize::PackageName; use crate::Error; -/// Returns `true` if the file is a `METADATA` file in a `.dist-info` directory that matches the -/// wheel filename. -pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> bool { - let Some((dist_info_dir, file)) = path.split_once('/') else { - return false; - }; - if file != "METADATA" { - return false; - } - let Some(dir_stem) = dist_info_dir.strip_suffix(".dist-info") else { - return false; - }; - let Some((name, version)) = dir_stem.rsplit_once('-') else { - return false; - }; - let Ok(name) = PackageName::from_str(name) else { - return false; - }; - if name != filename.name { - return false; - } - let Ok(version) = Version::from_str(version) else { - return false; - }; - if version != filename.version { - return false; - } - true -} - /// Find the `.dist-info` directory in a zipped wheel. /// /// Returns the dist info dir prefix without the `.dist-info` extension. diff --git a/crates/install-wheel-rs/src/wheel.rs b/crates/install-wheel-rs/src/wheel.rs index f53bf8f6ec3f..a41f4f2fe041 100644 --- a/crates/install-wheel-rs/src/wheel.rs +++ b/crates/install-wheel-rs/src/wheel.rs @@ -6,7 +6,6 @@ use std::{env, io}; use data_encoding::BASE64URL_NOPAD; use fs_err as fs; use fs_err::{DirEntry, File}; -use mailparse::MailHeaderMap; use rustc_hash::FxHashMap; use sha2::{Digest, Sha256}; use tracing::{instrument, warn}; @@ -16,6 +15,7 @@ use zip::ZipWriter; use pypi_types::DirectUrl; use uv_fs::{relative_to, Simplified}; +use uv_normalize::PackageName; use crate::record::RecordEntry; use crate::script::Script; @@ -557,7 +557,7 @@ pub(crate) fn install_data( layout: &Layout, site_packages: &Path, data_dir: &Path, - dist_name: &str, + dist_name: &PackageName, console_scripts: &[Script], gui_scripts: &[Script], record: &mut [RecordEntry], @@ -602,7 +602,7 @@ pub(crate) fn install_data( } } Some("headers") => { - let target_path = layout.scheme.include.join(dist_name); + let target_path = layout.scheme.include.join(dist_name.as_str()); move_folder_recorded(&path, &target_path, site_packages, record)?; } Some("purelib") => { @@ -727,49 +727,6 @@ fn parse_key_value_file( Ok(data) } -/// Parse the distribution name and version from a wheel's `dist-info` metadata. -/// -/// See: -pub(crate) fn parse_metadata( - dist_info_prefix: &str, - content: &[u8], -) -> Result<(String, String), Error> { - // HACK: trick mailparse to parse as UTF-8 instead of ASCII - let mut mail = b"Content-Type: text/plain; charset=utf-8\n".to_vec(); - mail.extend_from_slice(content); - let msg = mailparse::parse_mail(&mail).map_err(|err| { - Error::InvalidWheel(format!( - "Invalid metadata in {dist_info_prefix}.dist-info/METADATA: {err}" - )) - })?; - let headers = msg.get_headers(); - let metadata_version = - headers - .get_first_value("Metadata-Version") - .ok_or(Error::InvalidWheel(format!( - "No `Metadata-Version` field in: {dist_info_prefix}.dist-info/METADATA" - )))?; - // Crude but it should do https://packaging.python.org/en/latest/specifications/core-metadata/#metadata-version - // At time of writing: - // > Version of the file format; legal values are “1.0”, “1.1”, “1.2”, “2.1”, “2.2”, and “2.3”. - if !(metadata_version.starts_with("1.") || metadata_version.starts_with("2.")) { - return Err(Error::InvalidWheel(format!( - "`Metadata-Version` field has unsupported value {metadata_version} in: {dist_info_prefix}.dist-info/METADATA" - ))); - } - let name = headers - .get_first_value("Name") - .ok_or(Error::InvalidWheel(format!( - "No `Name` field in: {dist_info_prefix}.dist-info/METADATA" - )))?; - let version = headers - .get_first_value("Version") - .ok_or(Error::InvalidWheel(format!( - "No `Version` field in: {dist_info_prefix}.dist-info/METADATA" - )))?; - Ok((name, version)) -} - #[cfg(test)] mod test { use std::io::Cursor; diff --git a/crates/pypi-types/src/metadata.rs b/crates/pypi-types/src/metadata.rs index 50a35f6f62f7..3aa24add303b 100644 --- a/crates/pypi-types/src/metadata.rs +++ b/crates/pypi-types/src/metadata.rs @@ -338,6 +338,65 @@ impl Metadata10 { } } +/// Python Package Metadata 1.2 and later as specified in +/// . +/// +/// This is a subset of the full metadata specification, and only includes the +/// fields that have been consistent across all versions of the specification later than 1.2. +#[derive(Deserialize, Debug, Clone)] +#[serde(rename_all = "kebab-case")] +pub struct Metadata12 { + pub name: PackageName, + pub version: Version, + pub requires_python: Option, +} + +impl Metadata12 { + /// Parse the [`Metadata12`] from a `.dist-info` `METADATA` file, as included in a built + /// distribution. + pub fn parse_metadata(content: &[u8]) -> Result { + let headers = Headers::parse(content)?; + + // To rely on a source distribution's `PKG-INFO` file, the `Metadata-Version` field must be + // present and set to a value of at least `2.2`. + let metadata_version = headers + .get_first_value("Metadata-Version") + .ok_or(MetadataError::FieldNotFound("Metadata-Version"))?; + + // Parse the version into (major, minor). + let (major, minor) = parse_version(&metadata_version)?; + + // At time of writing: + // > Version of the file format; legal values are “1.0”, “1.1”, “1.2”, “2.1”, “2.2”, and “2.3”. + if (major, minor) < (1, 0) || (major, minor) >= (3, 0) { + return Err(MetadataError::InvalidMetadataVersion(metadata_version)); + } + + let name = PackageName::new( + headers + .get_first_value("Name") + .ok_or(MetadataError::FieldNotFound("Name"))?, + )?; + let version = Version::from_str( + &headers + .get_first_value("Version") + .ok_or(MetadataError::FieldNotFound("Version"))?, + ) + .map_err(MetadataError::Pep440VersionError)?; + let requires_python = headers + .get_first_value("Requires-Python") + .map(|requires_python| LenientVersionSpecifiers::from_str(&requires_python)) + .transpose()? + .map(VersionSpecifiers::from); + + Ok(Self { + name, + version, + requires_python, + }) + } +} + /// Parse a `Metadata-Version` field into a (major, minor) tuple. fn parse_version(metadata_version: &str) -> Result<(u8, u8), MetadataError> { let (major, minor) = diff --git a/crates/uv/src/commands/pip/operations.rs b/crates/uv/src/commands/pip/operations.rs index 483aa5020661..5b57c600868d 100644 --- a/crates/uv/src/commands/pip/operations.rs +++ b/crates/uv/src/commands/pip/operations.rs @@ -2,7 +2,6 @@ use std::fmt::{self, Write}; use std::path::PathBuf; -use std::time::Instant; use anyhow::{anyhow, Context}; use itertools::Itertools; @@ -99,7 +98,7 @@ pub(crate) async fn resolve( preview: PreviewMode, quiet: bool, ) -> Result { - let start = Instant::now(); + let start = std::time::Instant::now(); // Resolve the requirements from the provided sources. let requirements = { @@ -261,7 +260,7 @@ pub(crate) async fn resolve( // Prints a success message after completing resolution. pub(crate) fn resolution_success( resolution: &ResolutionGraph, - start: Instant, + start: std::time::Instant, printer: Printer, ) -> fmt::Result { let s = if resolution.len() == 1 { "" } else { "s" };