From 0999d435501628f4dca01ea37dbe25527cc9f8c3 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Fri, 7 Jun 2024 09:44:10 -0700 Subject: [PATCH] wasmtime: Annotate emit-clif output with source line numbers When we're compiling a WebAssembly module that contains a DWARF `.debug_lines` section, this commit adds comments to the output of `wasmtime compile --emit-clif` indicating which file/line/column each block of CLIF instructions originated from. This is useful when trying to understand why we're generating the code we do when there's a lot of WebAssembly in a single function. That can happen either because there's a lot of source code in that function, or because the toolchain (e.g. LLVM) inlined a lot of other functions into it before generating WebAssembly. --- crates/cranelift/src/compiler.rs | 55 +++++- crates/environ/src/compile/module_environ.rs | 181 +++++++++++++++++++ 2 files changed, 229 insertions(+), 7 deletions(-) diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 4262f5f8730a..3d33a58d923d 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -10,8 +10,8 @@ use cranelift_codegen::isa::{ OwnedTargetIsa, TargetIsa, }; use cranelift_codegen::print_errors::pretty_error; -use cranelift_codegen::Context; -use cranelift_codegen::{CompiledCode, MachStackMap}; +use cranelift_codegen::write::{decorate_function, FuncWriter, PlainWriter}; +use cranelift_codegen::{CompiledCode, Context, MachStackMap}; use cranelift_entity::{EntityRef, PrimaryMap}; use cranelift_frontend::FunctionBuilder; use cranelift_wasm::{ @@ -28,7 +28,7 @@ use std::sync::{Arc, Mutex}; use wasmparser::{FuncValidatorAllocations, FunctionBody}; use wasmtime_environ::{ AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, FlagValue, FunctionBodyData, - FunctionLoc, ModuleTranslation, ModuleTypesBuilder, PtrSize, RelocationTarget, + FunctionLoc, LineContext, ModuleTranslation, ModuleTypesBuilder, PtrSize, RelocationTarget, StackMapInformation, TrapEncodingBuilder, Tunables, VMOffsets, WasmFunctionInfo, }; @@ -224,14 +224,21 @@ impl wasmtime_environ::Compiler for Compiler { )?; if let Some(path) = &self.clif_dir { - use std::io::Write; - let mut path = path.to_path_buf(); path.push(format!("wasm_func_{}", func_index.as_u32())); path.set_extension("clif"); - let mut output = std::fs::File::create(path).unwrap(); - write!(output, "{}", context.func.display()).unwrap(); + let mut contents = String::new(); + decorate_function( + &mut LineNumberWriter { + context: LineContext::new(&translation.debuginfo), + base: PlainWriter, + }, + &mut contents, + &context.func, + ) + .unwrap(); + std::fs::write(path, contents).unwrap(); } let (info, func) = compiler.finish_with_info(Some((&body, &self.tunables)))?; @@ -919,6 +926,40 @@ impl FunctionCompiler<'_> { } } +struct LineNumberWriter<'a> { + context: LineContext<'a>, + base: PlainWriter, +} + +impl FuncWriter for LineNumberWriter<'_> { + fn write_block_header( + &mut self, + w: &mut dyn std::fmt::Write, + func: &ir::Function, + block: ir::Block, + indent: usize, + ) -> core::fmt::Result { + self.base.write_block_header(w, func, block, indent) + } + + fn write_instruction( + &mut self, + w: &mut dyn std::fmt::Write, + func: &ir::Function, + aliases: &cranelift_entity::SecondaryMap>, + inst: ir::Inst, + indent: usize, + ) -> core::fmt::Result { + let srcloc = func.srcloc(inst); + if !srcloc.is_default() { + if let Some(line) = self.context.lookup(srcloc.bits().into()) { + writeln!(w, ";; {line}")?; + } + } + self.base.write_instruction(w, func, aliases, inst, indent) + } +} + fn mach_stack_maps_to_stack_maps(mach_stack_maps: &[MachStackMap]) -> Vec { // This is converting from Cranelift's representation of a stack map to // Wasmtime's representation. They happen to align today but that may diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index 111e7f094970..adcce99f3b66 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -10,6 +10,10 @@ use crate::{ WasmResult, WasmValType, WasmparserTypeConverter, }; use anyhow::{bail, Result}; +use core::cmp::Ordering; +use core::mem::replace; +use core::num::NonZeroU64; +use core::ops::Range; use cranelift_entity::packed_option::ReservedValue; use cranelift_entity::EntityRef; use std::borrow::Cow; @@ -124,6 +128,8 @@ pub struct FunctionBodyData<'a> { #[allow(missing_docs)] pub struct DebugInfoData<'a> { pub dwarf: Dwarf<'a>, + pub line_units: Vec>, + pub line_sequences: Vec>, pub name_section: NameSection<'a>, pub wasm_file: WasmFileInfo, pub debug_loc: gimli::DebugLoc>, @@ -139,6 +145,20 @@ pub type Dwarf<'input> = gimli::Dwarf>; type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>; +#[derive(Debug)] +#[allow(missing_docs)] +pub struct LineUnit<'input> { + pub unit_offset: gimli::UnitSectionOffset, + pub line_program: gimli::CompleteLineProgram, usize>, +} + +#[derive(Debug)] +#[allow(missing_docs)] +pub struct LineSequence<'input> { + pub sequence: gimli::LineSequence>, + pub unit: usize, +} + #[derive(Debug, Default)] #[allow(missing_docs)] pub struct NameSection<'a> { @@ -163,6 +183,122 @@ pub struct FunctionMetadata { pub locals: Box<[(u32, WasmValType)]>, } +#[allow(missing_docs)] +pub struct LineContext<'a> { + debuginfo: &'a DebugInfoData<'a>, + sequence: Range, + current: Range, + last_file: u64, + next_file: u64, + next_line: Option, + next_column: gimli::ColumnType, + upcoming: Option< + gimli::LineRows, &'a gimli::CompleteLineProgram, usize>, usize>, + >, +} + +#[allow(missing_docs)] +impl<'a> LineContext<'a> { + pub fn new(debuginfo: &'a DebugInfoData<'a>) -> Self { + LineContext { + debuginfo, + sequence: 0..0, + current: 0..0, + last_file: u64::MAX, + next_file: 0, + next_line: None, + next_column: gimli::ColumnType::LeftEdge, + upcoming: None, + } + } + + pub fn lookup(&mut self, address: u64) -> Option { + let address = address - self.debuginfo.wasm_file.code_section_offset; + + if self.current.contains(&address) { + return None; + } + + if !self.sequence.contains(&address) { + let sequence_idx = self + .debuginfo + .line_sequences + .binary_search_by(|v| { + if address < v.sequence.start { + Ordering::Greater + } else if v.sequence.end <= address { + Ordering::Less + } else { + Ordering::Equal + } + }) + .ok()?; + + let sequence = &self.debuginfo.line_sequences[sequence_idx]; + let unit = &self.debuginfo.line_units[sequence.unit]; + self.sequence = sequence.sequence.start..sequence.sequence.end; + debug_assert!( + self.sequence.contains(&address), + "{:?} should contain {}", + self.sequence, + address + ); + + let mut rows = unit.line_program.resume_from(&sequence.sequence); + let (_header, first_row) = rows.next_row().unwrap().unwrap(); + self.current = 0..first_row.address(); + debug_assert_eq!(self.current.end, self.sequence.start); + debug_assert!( + !self.current.contains(&address), + "{:?} should not contain {}", + self.current, + address + ); + self.next_file = first_row.file_index(); + self.next_line = first_row.line(); + self.next_column = first_row.column(); + self.upcoming = Some(rows); + } + + let rows = self.upcoming.as_mut()?; + + let mut file = 0; + let mut line = None; + let mut column = gimli::ColumnType::LeftEdge; + while !self.current.contains(&address) { + let (_header, next_row) = rows.next_row().unwrap().unwrap(); + self.current = self.current.end..next_row.address(); + file = replace(&mut self.next_file, next_row.file_index()); + line = replace(&mut self.next_line, next_row.line()); + column = replace(&mut self.next_column, next_row.column()); + } + + let line = line.map_or(0, NonZeroU64::get); + let column = match column { + gimli::ColumnType::LeftEdge => 0, + gimli::ColumnType::Column(c) => c.get(), + }; + + if self.last_file != file { + self.last_file = file; + if let Some(file) = rows.header().file(file) { + let mut path = PathBuf::new(); + if let Some(gimli::AttributeValue::String(dir)) = file.directory(rows.header()) { + path.push(dir.to_string_lossy().as_ref()); + } + if let gimli::AttributeValue::String(file) = file.path_name() { + path.push(file.to_string_lossy().as_ref()); + } + Some(format!("{}:{line}:{column}", path.display())) + } else { + Some(format!(":{line}:{column}")) + } + } else { + Some(format!("{line}:{column}")) + } + } +} + impl<'a, 'data> ModuleEnvironment<'a, 'data> { /// Allocates the environment data structures. pub fn new( @@ -197,9 +333,54 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { self.translate_payload(payload?)?; } + if self.tunables.parse_wasm_debuginfo { + self.extract_line_program()?; + } + Ok(self.result) } + fn extract_line_program(&mut self) -> Result<()> { + let dwarf = &self.result.debuginfo.dwarf; + let mut completed = Vec::new(); + let mut all_sequences = Vec::new(); + let mut iter = dwarf.units(); + while let Some(header) = iter.next()? { + let unit = dwarf.unit(header)?; + if let Some(program) = unit.line_program.clone() { + let (program, sequences) = program.sequences()?; + let unit = completed.len(); + completed.push(LineUnit { + unit_offset: header.offset(), + line_program: program, + }); + all_sequences.extend( + sequences + .into_iter() + // FIXME: make gimli stop returning nonsense sequences + .filter(|sequence| sequence.start != 0) + .map(|sequence| LineSequence { sequence, unit }), + ); + } + } + + // Within each sequence DWARF specifies that the instruction + // addresses are in ascending order, but there are no guarantees + // across different sequences, which could even be interleaved + // across different compilation units. We sort the sequences so + // we can look them up easily as needed. + all_sequences.sort_unstable_by_key(|v| v.sequence.start); + + // We expect that no sequence overlaps with any other. + debug_assert!(all_sequences + .windows(2) + .all(|w| w[0].sequence.end <= w[1].sequence.start)); + + self.result.debuginfo.line_units = completed; + self.result.debuginfo.line_sequences = all_sequences; + Ok(()) + } + fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> { match payload { Payload::Version {