Skip to content

Commit

Permalink
wasmtime: Annotate emit-clif output with source line numbers
Browse files Browse the repository at this point in the history
When we're compiling a WebAssembly module that contains a DWARF
`.debug_lines` section, this commit adds comments to the output of
`wasmtime compile --emit-clif` indicating which file/line/column each
block of CLIF instructions originated from.

This is useful when trying to understand why we're generating the code
we do when there's a lot of WebAssembly in a single function. That can
happen either because there's a lot of source code in that function, or
because the toolchain (e.g. LLVM) inlined a lot of other functions into
it before generating WebAssembly.
  • Loading branch information
jameysharp committed Jun 11, 2024
1 parent e852bce commit 0999d43
Show file tree
Hide file tree
Showing 2 changed files with 229 additions and 7 deletions.
55 changes: 48 additions & 7 deletions crates/cranelift/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use cranelift_codegen::isa::{
OwnedTargetIsa, TargetIsa,
};
use cranelift_codegen::print_errors::pretty_error;
use cranelift_codegen::Context;
use cranelift_codegen::{CompiledCode, MachStackMap};
use cranelift_codegen::write::{decorate_function, FuncWriter, PlainWriter};
use cranelift_codegen::{CompiledCode, Context, MachStackMap};
use cranelift_entity::{EntityRef, PrimaryMap};
use cranelift_frontend::FunctionBuilder;
use cranelift_wasm::{
Expand All @@ -28,7 +28,7 @@ use std::sync::{Arc, Mutex};
use wasmparser::{FuncValidatorAllocations, FunctionBody};
use wasmtime_environ::{
AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, FlagValue, FunctionBodyData,
FunctionLoc, ModuleTranslation, ModuleTypesBuilder, PtrSize, RelocationTarget,
FunctionLoc, LineContext, ModuleTranslation, ModuleTypesBuilder, PtrSize, RelocationTarget,
StackMapInformation, TrapEncodingBuilder, Tunables, VMOffsets, WasmFunctionInfo,
};

Expand Down Expand Up @@ -224,14 +224,21 @@ impl wasmtime_environ::Compiler for Compiler {
)?;

if let Some(path) = &self.clif_dir {
use std::io::Write;

let mut path = path.to_path_buf();
path.push(format!("wasm_func_{}", func_index.as_u32()));
path.set_extension("clif");

let mut output = std::fs::File::create(path).unwrap();
write!(output, "{}", context.func.display()).unwrap();
let mut contents = String::new();
decorate_function(
&mut LineNumberWriter {
context: LineContext::new(&translation.debuginfo),
base: PlainWriter,
},
&mut contents,
&context.func,
)
.unwrap();
std::fs::write(path, contents).unwrap();
}

let (info, func) = compiler.finish_with_info(Some((&body, &self.tunables)))?;
Expand Down Expand Up @@ -919,6 +926,40 @@ impl FunctionCompiler<'_> {
}
}

struct LineNumberWriter<'a> {
context: LineContext<'a>,
base: PlainWriter,
}

impl FuncWriter for LineNumberWriter<'_> {
fn write_block_header(
&mut self,
w: &mut dyn std::fmt::Write,
func: &ir::Function,
block: ir::Block,
indent: usize,
) -> core::fmt::Result {
self.base.write_block_header(w, func, block, indent)
}

fn write_instruction(
&mut self,
w: &mut dyn std::fmt::Write,
func: &ir::Function,
aliases: &cranelift_entity::SecondaryMap<Value, Vec<Value>>,
inst: ir::Inst,
indent: usize,
) -> core::fmt::Result {
let srcloc = func.srcloc(inst);
if !srcloc.is_default() {
if let Some(line) = self.context.lookup(srcloc.bits().into()) {
writeln!(w, ";; {line}")?;
}
}
self.base.write_instruction(w, func, aliases, inst, indent)
}
}

fn mach_stack_maps_to_stack_maps(mach_stack_maps: &[MachStackMap]) -> Vec<StackMapInformation> {
// This is converting from Cranelift's representation of a stack map to
// Wasmtime's representation. They happen to align today but that may
Expand Down
181 changes: 181 additions & 0 deletions crates/environ/src/compile/module_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ use crate::{
WasmResult, WasmValType, WasmparserTypeConverter,
};
use anyhow::{bail, Result};
use core::cmp::Ordering;
use core::mem::replace;
use core::num::NonZeroU64;
use core::ops::Range;
use cranelift_entity::packed_option::ReservedValue;
use cranelift_entity::EntityRef;
use std::borrow::Cow;
Expand Down Expand Up @@ -124,6 +128,8 @@ pub struct FunctionBodyData<'a> {
#[allow(missing_docs)]
pub struct DebugInfoData<'a> {
pub dwarf: Dwarf<'a>,
pub line_units: Vec<LineUnit<'a>>,
pub line_sequences: Vec<LineSequence<'a>>,
pub name_section: NameSection<'a>,
pub wasm_file: WasmFileInfo,
pub debug_loc: gimli::DebugLoc<Reader<'a>>,
Expand All @@ -139,6 +145,20 @@ pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;

type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;

#[derive(Debug)]
#[allow(missing_docs)]
pub struct LineUnit<'input> {
pub unit_offset: gimli::UnitSectionOffset,
pub line_program: gimli::CompleteLineProgram<Reader<'input>, usize>,
}

#[derive(Debug)]
#[allow(missing_docs)]
pub struct LineSequence<'input> {
pub sequence: gimli::LineSequence<Reader<'input>>,
pub unit: usize,
}

#[derive(Debug, Default)]
#[allow(missing_docs)]
pub struct NameSection<'a> {
Expand All @@ -163,6 +183,122 @@ pub struct FunctionMetadata {
pub locals: Box<[(u32, WasmValType)]>,
}

#[allow(missing_docs)]
pub struct LineContext<'a> {
debuginfo: &'a DebugInfoData<'a>,
sequence: Range<u64>,
current: Range<u64>,
last_file: u64,
next_file: u64,
next_line: Option<NonZeroU64>,
next_column: gimli::ColumnType,
upcoming: Option<
gimli::LineRows<Reader<'a>, &'a gimli::CompleteLineProgram<Reader<'a>, usize>, usize>,
>,
}

#[allow(missing_docs)]
impl<'a> LineContext<'a> {
pub fn new(debuginfo: &'a DebugInfoData<'a>) -> Self {
LineContext {
debuginfo,
sequence: 0..0,
current: 0..0,
last_file: u64::MAX,
next_file: 0,
next_line: None,
next_column: gimli::ColumnType::LeftEdge,
upcoming: None,
}
}

pub fn lookup(&mut self, address: u64) -> Option<String> {
let address = address - self.debuginfo.wasm_file.code_section_offset;

if self.current.contains(&address) {
return None;
}

if !self.sequence.contains(&address) {
let sequence_idx = self
.debuginfo
.line_sequences
.binary_search_by(|v| {
if address < v.sequence.start {
Ordering::Greater
} else if v.sequence.end <= address {
Ordering::Less
} else {
Ordering::Equal
}
})
.ok()?;

let sequence = &self.debuginfo.line_sequences[sequence_idx];
let unit = &self.debuginfo.line_units[sequence.unit];
self.sequence = sequence.sequence.start..sequence.sequence.end;
debug_assert!(
self.sequence.contains(&address),
"{:?} should contain {}",
self.sequence,
address
);

let mut rows = unit.line_program.resume_from(&sequence.sequence);
let (_header, first_row) = rows.next_row().unwrap().unwrap();
self.current = 0..first_row.address();
debug_assert_eq!(self.current.end, self.sequence.start);
debug_assert!(
!self.current.contains(&address),
"{:?} should not contain {}",
self.current,
address
);
self.next_file = first_row.file_index();
self.next_line = first_row.line();
self.next_column = first_row.column();
self.upcoming = Some(rows);
}

let rows = self.upcoming.as_mut()?;

let mut file = 0;
let mut line = None;
let mut column = gimli::ColumnType::LeftEdge;
while !self.current.contains(&address) {
let (_header, next_row) = rows.next_row().unwrap().unwrap();
self.current = self.current.end..next_row.address();
file = replace(&mut self.next_file, next_row.file_index());
line = replace(&mut self.next_line, next_row.line());
column = replace(&mut self.next_column, next_row.column());
}

let line = line.map_or(0, NonZeroU64::get);
let column = match column {
gimli::ColumnType::LeftEdge => 0,
gimli::ColumnType::Column(c) => c.get(),
};

if self.last_file != file {
self.last_file = file;
if let Some(file) = rows.header().file(file) {
let mut path = PathBuf::new();
if let Some(gimli::AttributeValue::String(dir)) = file.directory(rows.header()) {
path.push(dir.to_string_lossy().as_ref());
}
if let gimli::AttributeValue::String(file) = file.path_name() {
path.push(file.to_string_lossy().as_ref());
}
Some(format!("{}:{line}:{column}", path.display()))
} else {
Some(format!("<unknown>:{line}:{column}"))
}
} else {
Some(format!("{line}:{column}"))
}
}
}

impl<'a, 'data> ModuleEnvironment<'a, 'data> {
/// Allocates the environment data structures.
pub fn new(
Expand Down Expand Up @@ -197,9 +333,54 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> {
self.translate_payload(payload?)?;
}

if self.tunables.parse_wasm_debuginfo {
self.extract_line_program()?;
}

Ok(self.result)
}

fn extract_line_program(&mut self) -> Result<()> {
let dwarf = &self.result.debuginfo.dwarf;
let mut completed = Vec::new();
let mut all_sequences = Vec::new();
let mut iter = dwarf.units();
while let Some(header) = iter.next()? {
let unit = dwarf.unit(header)?;
if let Some(program) = unit.line_program.clone() {
let (program, sequences) = program.sequences()?;
let unit = completed.len();
completed.push(LineUnit {
unit_offset: header.offset(),
line_program: program,
});
all_sequences.extend(
sequences
.into_iter()
// FIXME: make gimli stop returning nonsense sequences
.filter(|sequence| sequence.start != 0)
.map(|sequence| LineSequence { sequence, unit }),
);
}
}

// Within each sequence DWARF specifies that the instruction
// addresses are in ascending order, but there are no guarantees
// across different sequences, which could even be interleaved
// across different compilation units. We sort the sequences so
// we can look them up easily as needed.
all_sequences.sort_unstable_by_key(|v| v.sequence.start);

// We expect that no sequence overlaps with any other.
debug_assert!(all_sequences
.windows(2)
.all(|w| w[0].sequence.end <= w[1].sequence.start));

self.result.debuginfo.line_units = completed;
self.result.debuginfo.line_sequences = all_sequences;
Ok(())
}

fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
match payload {
Payload::Version {
Expand Down

0 comments on commit 0999d43

Please sign in to comment.