Skip to content

Commit

Permalink
factors: Fix wagi support
Browse files Browse the repository at this point in the history
Signed-off-by: Lann Martin <[email protected]>
  • Loading branch information
lann committed Aug 23, 2024
1 parent 0759b92 commit 7289d2a
Show file tree
Hide file tree
Showing 9 changed files with 221 additions and 118 deletions.
87 changes: 37 additions & 50 deletions crates/componentize/src/bugs.rs
Original file line number Diff line number Diff line change
@@ -1,60 +1,42 @@
use anyhow::bail;
use wasm_metadata::Producers;
use wasmparser::{Encoding, ExternalKind, Parser, Payload};
use crate::module_info::ModuleInfo;

pub const EARLIEST_PROBABLY_SAFE_CLANG_VERSION: &str = "15.0.7";

/// Represents the detected likelihood of the allocation bug fixed in
/// https://github.com/WebAssembly/wasi-libc/pull/377 being present in a Wasm
/// module.
#[derive(Debug, PartialEq)]
pub enum WasiLibc377Bug {
ProbablySafe,
ProbablyUnsafe,
ProbablyUnsafe { clang_version: String },
Unknown,
}

impl WasiLibc377Bug {
pub fn detect(module: &[u8]) -> anyhow::Result<Self> {
for payload in Parser::new(0).parse_all(module) {
match payload? {
Payload::Version { encoding, .. } if encoding != Encoding::Module => {
bail!("detection only applicable to modules");
}
Payload::ExportSection(reader) => {
for export in reader {
let export = export?;
if export.kind == ExternalKind::Func && export.name == "cabi_realloc" {
// `cabi_realloc` is a good signal that this module
// uses wit-bindgen, making it probably-safe.
tracing::debug!("Found cabi_realloc export");
return Ok(Self::ProbablySafe);
}
}
}
Payload::CustomSection(c) if c.name() == "producers" => {
let producers = Producers::from_bytes(c.data(), c.data_offset())?;
if let Some(clang_version) =
producers.get("processed-by").and_then(|f| f.get("clang"))
{
tracing::debug!(clang_version, "Parsed producers.processed-by.clang");

// Clang/LLVM version is a good proxy for wasi-sdk
// version; the allocation bug was fixed in wasi-sdk-18
// and LLVM was updated to 15.0.7 in wasi-sdk-19.
if let Some((major, minor, patch)) = parse_clang_version(clang_version) {
return if (major, minor, patch) >= (15, 0, 7) {
Ok(Self::ProbablySafe)
} else {
Ok(Self::ProbablyUnsafe)
};
} else {
tracing::warn!(
clang_version,
"Unexpected producers.processed-by.clang version"
);
}
}
}
_ => (),
pub fn detect(module_info: &ModuleInfo) -> anyhow::Result<Self> {
if module_info.probably_uses_wit_bindgen() {
// Modules built with wit-bindgen are probably safe.
return Ok(Self::ProbablySafe);
}
if let Some(clang_version) = &module_info.clang_version {
// Clang/LLVM version is a good proxy for wasi-sdk
// version; the allocation bug was fixed in wasi-sdk-18
// and LLVM was updated to 15.0.7 in wasi-sdk-19.
if let Some((major, minor, patch)) = parse_clang_version(clang_version) {
let earliest_safe =
parse_clang_version(EARLIEST_PROBABLY_SAFE_CLANG_VERSION).unwrap();
return if (major, minor, patch) >= earliest_safe {
Ok(Self::ProbablySafe)
} else {
Ok(Self::ProbablyUnsafe {
clang_version: clang_version.clone(),
})
};
} else {
tracing::warn!(
clang_version,
"Unexpected producers.processed-by.clang version"
);
}
}
Ok(Self::Unknown)
Expand Down Expand Up @@ -98,11 +80,15 @@ mod tests {
),
(
r#"(module (@producers (processed-by "clang" "15.0.6")))"#,
ProbablyUnsafe,
ProbablyUnsafe {
clang_version: "15.0.6".into(),
},
),
(
r#"(module (@producers (processed-by "clang" "14.0.0")))"#,
ProbablyUnsafe,
r#"(module (@producers (processed-by "clang" "14.0.0 extra-stuff")))"#,
ProbablyUnsafe {
clang_version: "14.0.0 extra-stuff".into(),
},
),
(
r#"(module (@producers (processed-by "clang" "a.b.c")))"#,
Expand All @@ -111,7 +97,8 @@ mod tests {
] {
eprintln!("WAT: {wasm}");
let module = wat::parse_str(wasm).unwrap();
let detected = WasiLibc377Bug::detect(&module).unwrap();
let module_info = ModuleInfo::from_module(&module).unwrap();
let detected = WasiLibc377Bug::detect(&module_info).unwrap();
assert_eq!(detected, expected);
}
}
Expand Down
68 changes: 39 additions & 29 deletions crates/componentize/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use {
anyhow::{anyhow, Context, Result},
convert::{IntoEntityType, IntoExportKind},
module_info::ModuleInfo,
std::{borrow::Cow, collections::HashSet},
wasm_encoder::{CustomSection, ExportSection, ImportSection, Module, RawSection},
wasmparser::{Encoding, Parser, Payload},
Expand All @@ -14,6 +15,7 @@ pub mod bugs;
#[cfg(test)]
mod abi_conformance;
mod convert;
mod module_info;

const SPIN_ADAPTER: &[u8] = include_bytes!(concat!(
env!("OUT_DIR"),
Expand Down Expand Up @@ -51,8 +53,9 @@ pub fn componentize_if_necessary(module_or_component: &[u8]) -> Result<Cow<[u8]>
}

pub fn componentize(module: &[u8]) -> Result<Vec<u8>> {
match WitBindgenVersion::from_module(module)? {
WitBindgenVersion::V0_2 => componentize_old_bindgen(module),
let module_info = ModuleInfo::from_module(module)?;
match WitBindgenVersion::detect(&module_info)? {
WitBindgenVersion::V0_2OrNone => componentize_old_module(module, &module_info),
WitBindgenVersion::GreaterThanV0_4 => componentize_new_bindgen(module),
WitBindgenVersion::Other(other) => Err(anyhow::anyhow!(
"cannot adapt modules created with wit-bindgen version {other}"
Expand All @@ -65,40 +68,36 @@ pub fn componentize(module: &[u8]) -> Result<Vec<u8>> {
#[derive(Debug)]
enum WitBindgenVersion {
GreaterThanV0_4,
V0_2,
V0_2OrNone,
Other(String),
}

impl WitBindgenVersion {
fn from_module(module: &[u8]) -> Result<Self> {
let (_, bindgen) = metadata::decode(module)?;
if let Some(producers) = bindgen.producers {
if let Some(processors) = producers.get("processed-by") {
let bindgen_version = processors.iter().find_map(|(key, value)| {
key.starts_with("wit-bindgen").then_some(value.as_str())
});
if let Some(v) = bindgen_version {
let mut parts = v.split('.');
let Some(major) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
let Some(minor) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
if (major == 0 && minor < 5) || major >= 1 {
return Ok(Self::Other(v.to_owned()));
}
// Either there should be no patch version or nothing after patch
if parts.next().is_none() || parts.next().is_none() {
return Ok(Self::GreaterThanV0_4);
} else {
return Ok(Self::Other(v.to_owned()));
}
fn detect(module_info: &ModuleInfo) -> Result<Self> {
if let Some(processors) = module_info.bindgen_processors() {
let bindgen_version = processors
.iter()
.find_map(|(key, value)| key.starts_with("wit-bindgen").then_some(value.as_str()));
if let Some(v) = bindgen_version {
let mut parts = v.split('.');
let Some(major) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
let Some(minor) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
if (major == 0 && minor < 5) || major >= 1 {
return Ok(Self::Other(v.to_owned()));
}
// Either there should be no patch version or nothing after patch
if parts.next().is_none() || parts.next().is_none() {
return Ok(Self::GreaterThanV0_4);
} else {
return Ok(Self::Other(v.to_owned()));
}
}
}

Ok(Self::V0_2)
Ok(Self::V0_2OrNone)
}
}

Expand All @@ -111,6 +110,17 @@ pub fn componentize_new_bindgen(module: &[u8]) -> Result<Vec<u8>> {
.encode()
}

/// Modules *not* produced with wit-bindgen >= 0.5 could be old wit-bindgen or no wit-bindgen
pub fn componentize_old_module(module: &[u8], module_info: &ModuleInfo) -> Result<Vec<u8>> {
// If the module has a _start export and doesn't obviously use wit-bindgen
// it is likely an old p1 command module.
if module_info.has_start_export && !module_info.probably_uses_wit_bindgen() {
componentize_command(module)
} else {
componentize_old_bindgen(module)
}
}

/// Modules produced with wit-bindgen 0.2 need more extensive adaption
pub fn componentize_old_bindgen(module: &[u8]) -> Result<Vec<u8>> {
let (module, exports) = retarget_imports_and_get_exports(ADAPTER_NAME, module)?;
Expand Down
111 changes: 111 additions & 0 deletions crates/componentize/src/module_info.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
use wasm_metadata::Producers;
use wasmparser::{Encoding, ExternalKind, Parser, Payload};
use wit_component::metadata::Bindgen;

// wit-bindgen has used both of these historically.
const CANONICAL_ABI_REALLOC_EXPORTS: &[&str] = &["cabi_realloc", "canonical_abi_realloc"];

/// Stores various bits of info parsed from a Wasm module that are relevant to
/// componentization.
#[derive(Default)]
pub struct ModuleInfo {
pub bindgen: Option<Bindgen>,
pub clang_version: Option<String>,
pub realloc_export: Option<String>,
pub has_start_export: bool,
}

impl ModuleInfo {
/// Parses info from the given binary module bytes.
pub fn from_module(module: &[u8]) -> anyhow::Result<Self> {
let mut info = Self::default();
for payload in Parser::new(0).parse_all(module) {
match payload? {
Payload::Version { encoding, .. } => {
anyhow::ensure!(
encoding == Encoding::Module,
"ModuleInfo::from_module is only applicable to Modules; got a {encoding:?}"
);
}
Payload::ExportSection(reader) => {
for export in reader {
let export = export?;
if export.kind == ExternalKind::Func {
if CANONICAL_ABI_REALLOC_EXPORTS.contains(&export.name) {
tracing::debug!(
"Found canonical ABI realloc export {:?}",
export.name
);
info.realloc_export = Some(export.name.to_string());
} else if export.name == "_start" {
tracing::debug!("Found _start export");
info.has_start_export = true;
}
}
}
}
Payload::CustomSection(c) => {
let section_name = c.name();
if section_name == "producers" {
let producers = Producers::from_bytes(c.data(), c.data_offset())?;
if let Some(clang_version) =
producers.get("processed-by").and_then(|f| f.get("clang"))
{
tracing::debug!(clang_version, "Parsed producers.processed-by.clang");
info.clang_version = Some(clang_version.to_string());
}
} else if section_name.starts_with("component-type") {
match decode_bindgen_custom_section(section_name, c.data()) {
Ok(bindgen) => {
tracing::debug!("Parsed bindgen section {section_name:?}");
info.bindgen = Some(bindgen);
}
Err(err) => tracing::warn!(
"Error parsing bindgen section {section_name:?}: {err}"
),
}
}
}
_ => (),
}
}
Ok(info)
}

/// Returns true if the given module was heuristically probably compiled
/// with wit-bindgen.
pub fn probably_uses_wit_bindgen(&self) -> bool {
if self.bindgen.is_some() {
// Presence of bindgen metadata is a strong signal
true
} else if self.realloc_export.is_some() {
// A canonical ABI realloc export is a decent signal
true
} else {
false
}
}

/// Returns the wit-bindgen metadata producers processed-by field, if
/// present.
pub fn bindgen_processors(&self) -> Option<wasm_metadata::ProducersField> {
self.bindgen
.as_ref()?
.producers
.as_ref()?
.get("processed-by")
}
}

/// This is a silly workaround for the limited public interface available in
/// [`wit_component::metadata`].
// TODO: Make Bindgen::decode_custom_section public?
fn decode_bindgen_custom_section(name: &str, data: &[u8]) -> anyhow::Result<Bindgen> {
let mut module = wasm_encoder::Module::new();
module.section(&wasm_encoder::CustomSection {
name: name.into(),
data: data.into(),
});
let (_, bindgen) = wit_component::metadata::decode(module.as_slice())?;
Ok(bindgen)
}
6 changes: 0 additions & 6 deletions crates/http/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,6 @@ pub enum HttpExecutorType {
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(default, deny_unknown_fields)]
pub struct WagiTriggerConfig {
/// The name of the entrypoint.
pub entrypoint: String,

/// A string representation of the argv array.
///
/// This should be a space-separate list of strings. The value
Expand All @@ -81,11 +78,9 @@ pub struct WagiTriggerConfig {
impl Default for WagiTriggerConfig {
fn default() -> Self {
/// This is the default Wagi entrypoint.
const WAGI_DEFAULT_ENTRYPOINT: &str = "_start";
const WAGI_DEFAULT_ARGV: &str = "${SCRIPT_NAME} ${ARGS}";

Self {
entrypoint: WAGI_DEFAULT_ENTRYPOINT.to_owned(),
argv: WAGI_DEFAULT_ARGV.to_owned(),
}
}
Expand All @@ -101,7 +96,6 @@ mod tests {
else {
panic!("wrong type");
};
assert_eq!(config.entrypoint, "_start");
assert_eq!(config.argv, "${SCRIPT_NAME} ${ARGS}");
}
}
Loading

0 comments on commit 7289d2a

Please sign in to comment.