diff --git a/.github/workflows/windows_tests.yml b/.github/workflows/windows_tests.yml index 3c930d54b13..82920c2a5f9 100644 --- a/.github/workflows/windows_tests.yml +++ b/.github/workflows/windows_tests.yml @@ -50,7 +50,11 @@ jobs: # Why are these tests not build with previous command? => fingerprint error. Use `CARGO_LOG=cargo::core::compiler::fingerprint=info` to investigate - name: Build specific tests without running. Twice for zig lld-link error. - run: cargo test --locked --release --no-run -p roc_ident -p roc_region -p roc_collections -p roc_can -p roc_types -p roc_solve -p roc_mono -p roc_gen_dev -p roc_gen_wasm -p roc_serialize -p roc_editor -p roc_linker -p roc_cli || cargo test --locked --release --no-run -p roc_ident -p roc_region -p roc_collections -p roc_can -p roc_types -p roc_solve -p roc_mono -p roc_gen_dev -p roc_gen_wasm -p roc_serialize -p roc_editor -p roc_linker -p roc_cli + run: cargo test --locked --release --no-run -p roc_ident -p roc_region -p roc_collections -p roc_can -p roc_types -p roc_solve -p roc_mono -p roc_gen_dev -p roc_gen_wasm -p roc_serialize -p roc_editor -p roc_linker -p roc_cli -p test_gen || cargo test --locked --release --no-run -p roc_ident -p roc_region -p roc_collections -p roc_can -p roc_types -p roc_solve -p roc_mono -p roc_gen_dev -p roc_gen_wasm -p roc_serialize -p roc_editor -p roc_linker -p roc_cli -p test_gen + + - name: Test setjmp/longjmp logic + run: cargo test-gen-dev --locked --release nat_alias && cargo test-gen-dev --locked --release a_crash - name: Actually run the tests. - run: cargo test --locked --release -p roc_ident -p roc_region -p roc_collections -p roc_can -p roc_types -p roc_solve -p roc_mono -p roc_gen_dev -p roc_gen_wasm -p roc_serialize -p roc_editor -p roc_linker -p roc_cli \ No newline at end of file + run: cargo test --locked --release -p roc_ident -p roc_region -p roc_collections -p roc_can -p roc_types -p roc_solve -p roc_mono -p roc_gen_dev -p roc_gen_wasm -p roc_serialize -p roc_editor -p roc_linker -p roc_cli + diff --git a/crates/compiler/gen_dev/src/generic64/aarch64.rs b/crates/compiler/gen_dev/src/generic64/aarch64.rs index fb777b1f4be..f16044aaf55 100644 --- a/crates/compiler/gen_dev/src/generic64/aarch64.rs +++ b/crates/compiler/gen_dev/src/generic64/aarch64.rs @@ -455,6 +455,18 @@ impl CallConv for AArch64C ) { todo!("Loading returned complex symbols for AArch64"); } + + fn setjmp(_buf: &mut Vec<'_, u8>) { + todo!() + } + + fn longjmp(_buf: &mut Vec<'_, u8>) { + todo!() + } + + fn roc_panic(_buf: &mut Vec<'_, u8>, _relocs: &mut Vec<'_, Relocation>) { + todo!() + } } impl Assembler for AArch64Assembler { @@ -529,7 +541,17 @@ impl Assembler for AArch64Assembler { _fn_name: String, _dst: AArch64GeneralReg, ) { - todo!("calling functions literal for AArch64"); + todo!("function pointer for AArch64"); + } + + #[inline(always)] + fn data_pointer( + _buf: &mut Vec<'_, u8>, + _relocs: &mut Vec<'_, Relocation>, + _fn_name: String, + _dst: AArch64GeneralReg, + ) { + todo!("data pointer for AArch64"); } #[inline(always)] diff --git a/crates/compiler/gen_dev/src/generic64/mod.rs b/crates/compiler/gen_dev/src/generic64/mod.rs index 68b27356f36..98a9dfc5ca4 100644 --- a/crates/compiler/gen_dev/src/generic64/mod.rs +++ b/crates/compiler/gen_dev/src/generic64/mod.rs @@ -134,6 +134,10 @@ pub trait CallConv, ); + + fn setjmp(buf: &mut Vec<'_, u8>); + fn longjmp(buf: &mut Vec<'_, u8>); + fn roc_panic(buf: &mut Vec<'_, u8>, relocs: &mut Vec<'_, Relocation>); } pub enum CompareOperation { @@ -238,6 +242,13 @@ pub trait Assembler: Sized + Copy { dst: GeneralReg, ); + fn data_pointer( + buf: &mut Vec<'_, u8>, + relocs: &mut Vec<'_, Relocation>, + fn_name: String, + dst: GeneralReg, + ); + /// Jumps by an offset of offset bytes unconditionally. /// It should always generate the same number of bytes to enable replacement if offset changes. /// It returns the base offset to calculate the jump from (generally the instruction after the jump). @@ -714,6 +725,9 @@ impl< fn interner(&self) -> &STLayoutInterner<'a> { self.layout_interner } + fn relocations_mut(&mut self) -> &mut Vec<'a, Relocation> { + &mut self.relocs + } fn module_interns_helpers_mut( &mut self, ) -> ( @@ -887,12 +901,47 @@ impl< (out.into_bump_slice(), offset) } + fn build_roc_setjmp(&mut self) -> &'a [u8] { + let mut out = bumpalo::vec![in self.env.arena]; + + CC::setjmp(&mut out); + + out.into_bump_slice() + } + + fn build_roc_longjmp(&mut self) -> &'a [u8] { + let mut out = bumpalo::vec![in self.env.arena]; + + CC::longjmp(&mut out); + + out.into_bump_slice() + } + + fn build_roc_panic(&mut self) -> (&'a [u8], Vec<'a, Relocation>) { + let mut out = bumpalo::vec![in self.env.arena]; + let mut relocs = bumpalo::vec![in self.env.arena]; + + CC::roc_panic(&mut out, &mut relocs); + + (out.into_bump_slice(), relocs) + } + fn build_fn_pointer(&mut self, dst: &Symbol, fn_name: String) { let reg = self.storage_manager.claim_general_reg(&mut self.buf, dst); ASM::function_pointer(&mut self.buf, &mut self.relocs, fn_name, reg) } + fn build_data_pointer(&mut self, dst: &Symbol, data_name: String) { + let reg = self.storage_manager.claim_general_reg(&mut self.buf, dst); + + // now, this gives a pointer to the value + ASM::data_pointer(&mut self.buf, &mut self.relocs, data_name, reg); + + // dereference + ASM::mov_reg64_mem64_offset32(&mut self.buf, reg, reg, 0); + } + fn build_fn_call( &mut self, dst: &Symbol, @@ -4215,7 +4264,18 @@ impl< Builtin::Int(int_width) => match int_width { IntWidth::I128 | IntWidth::U128 => { // can we treat this as 2 u64's? - todo!() + storage_manager.with_tmp_general_reg( + buf, + |storage_manager, buf, tmp_reg| { + let base_offset = storage_manager.claim_stack_area(&dst, 16); + + ASM::mov_reg64_mem64_offset32(buf, tmp_reg, ptr_reg, offset); + ASM::mov_base32_reg64(buf, base_offset, tmp_reg); + + ASM::mov_reg64_mem64_offset32(buf, tmp_reg, ptr_reg, offset + 8); + ASM::mov_base32_reg64(buf, base_offset + 8, tmp_reg); + }, + ); } IntWidth::I64 | IntWidth::U64 => { let dst_reg = storage_manager.claim_general_reg(buf, &dst); @@ -4253,6 +4313,15 @@ impl< } Builtin::Decimal => { // same as 128-bit integer + storage_manager.with_tmp_general_reg(buf, |storage_manager, buf, tmp_reg| { + let base_offset = storage_manager.claim_stack_area(&dst, 16); + + ASM::mov_reg64_mem64_offset32(buf, tmp_reg, ptr_reg, offset); + ASM::mov_base32_reg64(buf, base_offset, tmp_reg); + + ASM::mov_reg64_mem64_offset32(buf, tmp_reg, ptr_reg, offset + 8); + ASM::mov_base32_reg64(buf, base_offset + 8, tmp_reg); + }); } Builtin::Str | Builtin::List(_) => { storage_manager.with_tmp_general_reg(buf, |storage_manager, buf, tmp_reg| { diff --git a/crates/compiler/gen_dev/src/generic64/storage.rs b/crates/compiler/gen_dev/src/generic64/storage.rs index 127e1c4d675..d60338e4c2b 100644 --- a/crates/compiler/gen_dev/src/generic64/storage.rs +++ b/crates/compiler/gen_dev/src/generic64/storage.rs @@ -807,7 +807,13 @@ impl< } } } - Builtin::Decimal => todo!(), + Builtin::Decimal => { + let (from_offset, size) = self.stack_offset_and_size(sym); + debug_assert_eq!(from_offset % 8, 0); + debug_assert_eq!(size % 8, 0); + debug_assert_eq!(size, layout_interner.stack_size(*layout)); + self.copy_to_stack_offset(buf, size, from_offset, to_offset) + } Builtin::Str | Builtin::List(_) => { let (from_offset, size) = self.stack_offset_and_size(sym); debug_assert_eq!(size, layout_interner.stack_size(*layout)); diff --git a/crates/compiler/gen_dev/src/generic64/x86_64.rs b/crates/compiler/gen_dev/src/generic64/x86_64.rs index f67f8771943..918330fc58d 100644 --- a/crates/compiler/gen_dev/src/generic64/x86_64.rs +++ b/crates/compiler/gen_dev/src/generic64/x86_64.rs @@ -436,6 +436,168 @@ impl CallConv for X86_64Syste } } } + + fn setjmp(buf: &mut Vec<'_, u8>) { + use X86_64GeneralReg::*; + type ASM = X86_64Assembler; + + // based on the musl libc setjmp implementation + // + // 000000000020237c <__setjmp>: + // 20237c: 48 89 1f mov QWORD PTR [rdi],rbx + // 20237f: 48 89 6f 08 mov QWORD PTR [rdi+0x8],rbp + // 202383: 4c 89 67 10 mov QWORD PTR [rdi+0x10],r12 + // 202387: 4c 89 6f 18 mov QWORD PTR [rdi+0x18],r13 + // 20238b: 4c 89 77 20 mov QWORD PTR [rdi+0x20],r14 + // 20238f: 4c 89 7f 28 mov QWORD PTR [rdi+0x28],r15 + // 202393: 48 8d 54 24 08 lea rdx,[rsp+0x8] + // 202398: 48 89 57 30 mov QWORD PTR [rdi+0x30],rdx + // 20239c: 48 8b 14 24 mov rdx,QWORD PTR [rsp] + // 2023a0: 48 89 57 38 mov QWORD PTR [rdi+0x38],rdx + // 2023a4: 31 c0 xor eax,eax + // 2023a6: c3 ret + + let env = RDI; + + // store caller-saved (i.e. non-volatile) registers + ASM::mov_mem64_offset32_reg64(buf, env, 0x00, RBX); + ASM::mov_mem64_offset32_reg64(buf, env, 0x08, RBP); + ASM::mov_mem64_offset32_reg64(buf, env, 0x10, R12); + ASM::mov_mem64_offset32_reg64(buf, env, 0x18, R13); + ASM::mov_mem64_offset32_reg64(buf, env, 0x20, R14); + ASM::mov_mem64_offset32_reg64(buf, env, 0x28, R15); + + // go one value up (as if setjmp wasn't called) + lea_reg64_offset8(buf, RDX, RSP, 0x8); + + // store the new stack pointer + ASM::mov_mem64_offset32_reg64(buf, env, 0x30, RDX); + + // store the address we'll resume at + ASM::mov_reg64_mem64_offset32(buf, RDX, RSP, 0); + ASM::mov_mem64_offset32_reg64(buf, env, 0x38, RDX); + + // zero out eax, so we return 0 (we do a 64-bit xor for convenience) + ASM::xor_reg64_reg64_reg64(buf, RAX, RAX, RAX); + + ASM::ret(buf) + } + + fn longjmp(buf: &mut Vec<'_, u8>) { + use X86_64GeneralReg::*; + type ASM = X86_64Assembler; + + // 202358: 31 c0 xor eax,eax + // 20235a: 83 fe 01 cmp esi,0x1 + // 20235d: 11 f0 adc eax,esi + // 20235f: 48 8b 1f mov rbx,QWORD PTR [rdi] + // 202362: 48 8b 6f 08 mov rbp,QWORD PTR [rdi+0x8] + // 202366: 4c 8b 67 10 mov r12,QWORD PTR [rdi+0x10] + // 20236a: 4c 8b 6f 18 mov r13,QWORD PTR [rdi+0x18] + // 20236e: 4c 8b 77 20 mov r14,QWORD PTR [rdi+0x20] + // 202372: 4c 8b 7f 28 mov r15,QWORD PTR [rdi+0x28] + // 202376: 48 8b 67 30 mov rsp,QWORD PTR [rdi+0x30] + // 20237a: ff 67 38 jmp QWORD PTR [rdi+0x38] + + // make sure something nonzero is returned ?! + ASM::mov_reg64_reg64(buf, RAX, RSI); + + // load the caller-saved registers + let env = RDI; + ASM::mov_reg64_mem64_offset32(buf, RBX, env, 0x00); + ASM::mov_reg64_mem64_offset32(buf, RBP, env, 0x08); + ASM::mov_reg64_mem64_offset32(buf, R12, env, 0x10); + ASM::mov_reg64_mem64_offset32(buf, R13, env, 0x18); + ASM::mov_reg64_mem64_offset32(buf, R14, env, 0x20); + ASM::mov_reg64_mem64_offset32(buf, R15, env, 0x28); + + // value of rsp before the setjmp call + ASM::mov_reg64_mem64_offset32(buf, RSP, env, 0x30); + + jmp_reg64_offset8(buf, env, 0x38) + } + + fn roc_panic(buf: &mut Vec<'_, u8>, relocs: &mut Vec<'_, Relocation>) { + use X86_64GeneralReg::*; + type ASM = X86_64Assembler; + + // move the first argument to roc_panic (a *RocStr) into r8 + ASM::add_reg64_reg64_imm32(buf, R8, RSP, 8); + + // move the crash tag into the second return register. We add 1 to it because the 0 value + // is already used for "no crash occurred" + ASM::add_reg64_reg64_imm32(buf, RDX, RDI, 1); + + // the setlongjmp_buffer + ASM::data_pointer(buf, relocs, String::from("setlongjmp_buffer"), RDI); + ASM::mov_reg64_mem64_offset32(buf, RDI, RDI, 0); + + // the value to return from the longjmp. It is a pointer to the last 3 words of the setlongjmp_buffer + // they represent the errore message. + ASM::mov_reg64_imm64(buf, RSI, 0x40); + ASM::add_reg64_reg64_reg64(buf, RSI, RSI, RDI); + + for offset in [0, 8, 16] { + ASM::mov_reg64_mem64_offset32(buf, R9, R8, offset); + ASM::mov_mem64_offset32_reg64(buf, RSI, offset, R9); + } + + Self::longjmp(buf) + } +} + +fn copy_symbol_to_stack<'a, CC>( + buf: &mut Vec<'a, u8>, + storage_manager: &mut X86_64StorageManager<'a, '_, CC>, + sym: Symbol, + tmp_reg: X86_64GeneralReg, + stack_offset: i32, +) -> u32 +where + CC: CallConv, +{ + type ASM = X86_64Assembler; + + let mut copied = 0; + let (base_offset, size) = storage_manager.stack_offset_and_size(&sym); + + if size - copied >= 8 { + for _ in (0..(size - copied)).step_by(8) { + ASM::mov_reg64_base32(buf, tmp_reg, base_offset + copied as i32); + ASM::mov_stack32_reg64(buf, stack_offset + copied as i32, tmp_reg); + + copied += 8; + } + } + + if size - copied >= 4 { + for _ in (0..(size - copied)).step_by(4) { + ASM::mov_reg32_base32(buf, tmp_reg, base_offset + copied as i32); + ASM::mov_stack32_reg32(buf, stack_offset + copied as i32, tmp_reg); + + copied += 4; + } + } + + if size - copied >= 2 { + for _ in (0..(size - copied)).step_by(2) { + ASM::mov_reg16_base32(buf, tmp_reg, base_offset + copied as i32); + ASM::mov_stack32_reg16(buf, stack_offset + copied as i32, tmp_reg); + + copied += 2; + } + } + + if size - copied >= 1 { + for _ in (0..(size - copied)).step_by(1) { + ASM::mov_reg8_base32(buf, tmp_reg, base_offset + copied as i32); + ASM::mov_stack32_reg8(buf, stack_offset + copied as i32, tmp_reg); + + copied += 1; + } + } + + size } struct X64_64SystemVStoreArgs { @@ -459,6 +621,9 @@ impl X64_64SystemVStoreArgs { sym: Symbol, in_layout: InLayout<'a>, ) { + // we use the return register as a temporary register; it will be overwritten anyway + let tmp_reg = Self::GENERAL_RETURN_REGS[0]; + match layout_interner.get_repr(in_layout) { single_register_integers!() => self.store_arg_general(buf, storage_manager, sym), pointer_layouts!() => self.store_arg_general(buf, storage_manager, sym), @@ -491,21 +656,10 @@ impl X64_64SystemVStoreArgs { _ if layout_interner.stack_size(in_layout) > 16 => { // TODO: Double check this. // Just copy onto the stack. - // Use return reg as buffer because it will be empty right now. - let (base_offset, size) = storage_manager.stack_offset_and_size(&sym); - debug_assert_eq!(base_offset % 8, 0); - for i in (0..size as i32).step_by(8) { - X86_64Assembler::mov_reg64_base32( - buf, - Self::GENERAL_RETURN_REGS[0], - base_offset + i, - ); - X86_64Assembler::mov_stack32_reg64( - buf, - self.tmp_stack_offset + i, - Self::GENERAL_RETURN_REGS[0], - ); - } + let stack_offset = self.tmp_stack_offset; + + let size = copy_symbol_to_stack(buf, storage_manager, sym, tmp_reg, stack_offset); + self.tmp_stack_offset += size as i32; } LayoutRepr::LambdaSet(lambda_set) => self.store_arg( @@ -516,67 +670,157 @@ impl X64_64SystemVStoreArgs { lambda_set.runtime_representation(), ), LayoutRepr::Struct { .. } => { - // for now, just also store this on the stack - let (base_offset, size) = storage_manager.stack_offset_and_size(&sym); - debug_assert_eq!(base_offset % 8, 0); - for i in (0..size as i32).step_by(8) { - X86_64Assembler::mov_reg64_base32( - buf, - Self::GENERAL_RETURN_REGS[0], - base_offset + i, - ); - X86_64Assembler::mov_stack32_reg64( - buf, - self.tmp_stack_offset + i, - Self::GENERAL_RETURN_REGS[0], - ); - } + let stack_offset = self.tmp_stack_offset; + + let size = copy_symbol_to_stack(buf, storage_manager, sym, tmp_reg, stack_offset); + self.tmp_stack_offset += size as i32; } LayoutRepr::Union(UnionLayout::NonRecursive(_)) => { - type ASM = X86_64Assembler; - - let tmp_reg = Self::GENERAL_RETURN_REGS[0]; let stack_offset = self.tmp_stack_offset; - let mut copied = 0; - let (base_offset, size) = storage_manager.stack_offset_and_size(&sym); + let size = copy_symbol_to_stack(buf, storage_manager, sym, tmp_reg, stack_offset); - if size - copied >= 8 { - for _ in (0..(size - copied)).step_by(8) { - ASM::mov_reg64_base32(buf, tmp_reg, base_offset + copied as i32); - ASM::mov_stack32_reg64(buf, stack_offset + copied as i32, tmp_reg); + self.tmp_stack_offset += size as i32; + } + _ => { + todo!( + "calling with arg type, {:?}", + layout_interner.dbg(in_layout) + ); + } + } + } - copied += 8; - } - } + fn store_arg_general<'a>( + &mut self, + buf: &mut Vec<'a, u8>, + storage_manager: &mut X86_64StorageManager<'a, '_, X86_64SystemV>, + sym: Symbol, + ) { + match Self::GENERAL_PARAM_REGS.get(self.general_i) { + Some(reg) => { + storage_manager.load_to_specified_general_reg(buf, &sym, *reg); + self.general_i += 1; + } + None => { + // Copy to stack using return reg as buffer. + let tmp = Self::GENERAL_RETURN_REGS[0]; - if size - copied >= 4 { - for _ in (0..(size - copied)).step_by(4) { - ASM::mov_reg32_base32(buf, tmp_reg, base_offset + copied as i32); - ASM::mov_stack32_reg32(buf, stack_offset + copied as i32, tmp_reg); + storage_manager.load_to_specified_general_reg(buf, &sym, tmp); + X86_64Assembler::mov_stack32_reg64(buf, self.tmp_stack_offset, tmp); - copied += 4; - } - } + self.tmp_stack_offset += 8; + } + } + } - if size - copied >= 2 { - for _ in (0..(size - copied)).step_by(2) { - ASM::mov_reg16_base32(buf, tmp_reg, base_offset + copied as i32); - ASM::mov_stack32_reg16(buf, stack_offset + copied as i32, tmp_reg); + fn store_arg_float<'a>( + &mut self, + buf: &mut Vec<'a, u8>, + storage_manager: &mut X86_64StorageManager<'a, '_, X86_64SystemV>, + sym: Symbol, + ) { + match Self::FLOAT_PARAM_REGS.get(self.float_i) { + Some(reg) => { + storage_manager.load_to_specified_float_reg(buf, &sym, *reg); + self.float_i += 1; + } + None => { + // Copy to stack using return reg as buffer. + let tmp = Self::FLOAT_RETURN_REGS[0]; - copied += 2; - } - } + storage_manager.load_to_specified_float_reg(buf, &sym, tmp); + X86_64Assembler::mov_stack32_freg64(buf, self.tmp_stack_offset, tmp); - if size - copied >= 1 { - for _ in (0..(size - copied)).step_by(1) { - ASM::mov_reg8_base32(buf, tmp_reg, base_offset + copied as i32); - ASM::mov_stack32_reg8(buf, stack_offset + copied as i32, tmp_reg); + self.tmp_stack_offset += 8; + } + } + } +} - copied += 1; - } +struct X64_64WindowsFastCallStoreArgs { + general_i: usize, + float_i: usize, + tmp_stack_offset: i32, +} + +impl X64_64WindowsFastCallStoreArgs { + const GENERAL_PARAM_REGS: &'static [X86_64GeneralReg] = + X86_64WindowsFastcall::GENERAL_PARAM_REGS; + const GENERAL_RETURN_REGS: &'static [X86_64GeneralReg] = + X86_64WindowsFastcall::GENERAL_RETURN_REGS; + + const FLOAT_PARAM_REGS: &'static [X86_64FloatReg] = X86_64WindowsFastcall::FLOAT_PARAM_REGS; + const FLOAT_RETURN_REGS: &'static [X86_64FloatReg] = X86_64WindowsFastcall::FLOAT_RETURN_REGS; + + fn store_arg<'a>( + &mut self, + buf: &mut Vec<'a, u8>, + storage_manager: &mut X86_64StorageManager<'a, '_, X86_64WindowsFastcall>, + layout_interner: &mut STLayoutInterner<'a>, + sym: Symbol, + in_layout: InLayout<'a>, + ) { + // we use the return register as a temporary register; it will be overwritten anyway + let tmp_reg = Self::GENERAL_RETURN_REGS[0]; + + match layout_interner.get_repr(in_layout) { + single_register_integers!() => self.store_arg_general(buf, storage_manager, sym), + pointer_layouts!() => self.store_arg_general(buf, storage_manager, sym), + single_register_floats!() => self.store_arg_float(buf, storage_manager, sym), + LayoutRepr::I128 | LayoutRepr::U128 => { + let (offset, _) = storage_manager.stack_offset_and_size(&sym); + + if self.general_i + 1 < Self::GENERAL_PARAM_REGS.len() { + let reg1 = Self::GENERAL_PARAM_REGS[self.general_i]; + let reg2 = Self::GENERAL_PARAM_REGS[self.general_i + 1]; + + X86_64Assembler::mov_reg64_base32(buf, reg1, offset); + X86_64Assembler::mov_reg64_base32(buf, reg2, offset + 8); + + self.general_i += 2; + } else { + // Copy to stack using return reg as buffer. + let reg = Self::GENERAL_RETURN_REGS[0]; + + X86_64Assembler::mov_reg64_base32(buf, reg, offset); + X86_64Assembler::mov_stack32_reg64(buf, self.tmp_stack_offset, reg); + + X86_64Assembler::mov_reg64_base32(buf, reg, offset + 8); + X86_64Assembler::mov_stack32_reg64(buf, self.tmp_stack_offset + 8, reg); + + self.tmp_stack_offset += 16; } + } + _ if layout_interner.stack_size(in_layout) == 0 => {} + _ if layout_interner.stack_size(in_layout) > 16 => { + // for now, just copy onto the stack. + let stack_offset = self.tmp_stack_offset; + + let size = copy_symbol_to_stack(buf, storage_manager, sym, tmp_reg, stack_offset); + + self.tmp_stack_offset += size as i32; + } + LayoutRepr::LambdaSet(lambda_set) => self.store_arg( + buf, + storage_manager, + layout_interner, + sym, + lambda_set.runtime_representation(), + ), + LayoutRepr::Struct { .. } => { + // for now, just also store this on the stack + let stack_offset = self.tmp_stack_offset; + + let size = copy_symbol_to_stack(buf, storage_manager, sym, tmp_reg, stack_offset); + + self.tmp_stack_offset += size as i32; + } + LayoutRepr::Union(UnionLayout::NonRecursive(_)) => { + let stack_offset = self.tmp_stack_offset; + + let size = copy_symbol_to_stack(buf, storage_manager, sym, tmp_reg, stack_offset); self.tmp_stack_offset += size as i32; } @@ -592,7 +836,7 @@ impl X64_64SystemVStoreArgs { fn store_arg_general<'a>( &mut self, buf: &mut Vec<'a, u8>, - storage_manager: &mut X86_64StorageManager<'a, '_, X86_64SystemV>, + storage_manager: &mut X86_64StorageManager<'a, '_, X86_64WindowsFastcall>, sym: Symbol, ) { match Self::GENERAL_PARAM_REGS.get(self.general_i) { @@ -615,7 +859,7 @@ impl X64_64SystemVStoreArgs { fn store_arg_float<'a>( &mut self, buf: &mut Vec<'a, u8>, - storage_manager: &mut X86_64StorageManager<'a, '_, X86_64SystemV>, + storage_manager: &mut X86_64StorageManager<'a, '_, X86_64WindowsFastcall>, sym: Symbol, ) { match Self::FLOAT_PARAM_REGS.get(self.float_i) { @@ -636,15 +880,15 @@ impl X64_64SystemVStoreArgs { } } +type X86_64StorageManager<'a, 'r, CallConv> = + StorageManager<'a, 'r, X86_64GeneralReg, X86_64FloatReg, X86_64Assembler, CallConv>; + struct X64_64SystemVLoadArgs { general_i: usize, float_i: usize, argument_offset: i32, } -type X86_64StorageManager<'a, 'r, CallConv> = - StorageManager<'a, 'r, X86_64GeneralReg, X86_64FloatReg, X86_64Assembler, CallConv>; - impl X64_64SystemVLoadArgs { fn load_arg<'a>( &mut self, @@ -700,9 +944,8 @@ impl X64_64SystemVLoadArgs { storage_manager: &mut X86_64StorageManager<'_, '_, X86_64SystemV>, sym: Symbol, ) { - if self.general_i < X86_64SystemV::GENERAL_PARAM_REGS.len() { - let reg = X86_64SystemV::GENERAL_PARAM_REGS[self.general_i]; - storage_manager.general_reg_arg(&sym, reg); + if let Some(reg) = X86_64SystemV::GENERAL_PARAM_REGS.get(self.general_i) { + storage_manager.general_reg_arg(&sym, *reg); self.general_i += 1; } else { storage_manager.primitive_stack_arg(&sym, self.argument_offset); @@ -715,9 +958,93 @@ impl X64_64SystemVLoadArgs { storage_manager: &mut X86_64StorageManager<'_, '_, X86_64SystemV>, sym: Symbol, ) { - if self.float_i < X86_64SystemV::FLOAT_PARAM_REGS.len() { - let reg = X86_64SystemV::FLOAT_PARAM_REGS[self.float_i]; - storage_manager.float_reg_arg(&sym, reg); + if let Some(reg) = X86_64SystemV::FLOAT_PARAM_REGS.get(self.float_i) { + storage_manager.float_reg_arg(&sym, *reg); + self.float_i += 1; + } else { + storage_manager.primitive_stack_arg(&sym, self.argument_offset); + self.argument_offset += 8; + } + } +} + +struct X64_64WindowsFastCallLoadArgs { + general_i: usize, + float_i: usize, + argument_offset: i32, +} + +impl X64_64WindowsFastCallLoadArgs { + fn load_arg<'a>( + &mut self, + storage_manager: &mut X86_64StorageManager<'a, '_, X86_64WindowsFastcall>, + layout_interner: &mut STLayoutInterner<'a>, + sym: Symbol, + in_layout: InLayout<'a>, + ) { + let stack_size = layout_interner.stack_size(in_layout); + match layout_interner.get_repr(in_layout) { + single_register_integers!() => self.load_arg_general(storage_manager, sym), + pointer_layouts!() => self.load_arg_general(storage_manager, sym), + single_register_floats!() => self.load_arg_float(storage_manager, sym), + _ if stack_size == 0 => { + storage_manager.no_data(&sym); + } + _ if stack_size > 16 => { + // TODO: Double check this. + storage_manager.complex_stack_arg(&sym, self.argument_offset, stack_size); + self.argument_offset += stack_size as i32; + } + LayoutRepr::LambdaSet(lambda_set) => self.load_arg( + storage_manager, + layout_interner, + sym, + lambda_set.runtime_representation(), + ), + LayoutRepr::Struct { .. } => { + // for now, just also store this on the stack + storage_manager.complex_stack_arg(&sym, self.argument_offset, stack_size); + self.argument_offset += stack_size as i32; + } + LayoutRepr::Builtin(Builtin::Int(IntWidth::U128 | IntWidth::I128)) => { + storage_manager.complex_stack_arg(&sym, self.argument_offset, stack_size); + self.argument_offset += stack_size as i32; + } + LayoutRepr::Union(UnionLayout::NonRecursive(_)) => { + // for now, just also store this on the stack + storage_manager.complex_stack_arg(&sym, self.argument_offset, stack_size); + self.argument_offset += stack_size as i32; + } + _ => { + todo!( + "Loading args with layout {:?}", + layout_interner.dbg(in_layout) + ); + } + } + } + + fn load_arg_general( + &mut self, + storage_manager: &mut X86_64StorageManager<'_, '_, X86_64WindowsFastcall>, + sym: Symbol, + ) { + if let Some(reg) = X86_64WindowsFastcall::GENERAL_PARAM_REGS.get(self.general_i) { + storage_manager.general_reg_arg(&sym, *reg); + self.general_i += 1; + } else { + storage_manager.primitive_stack_arg(&sym, self.argument_offset); + self.argument_offset += 8; + } + } + + fn load_arg_float( + &mut self, + storage_manager: &mut X86_64StorageManager<'_, '_, X86_64WindowsFastcall>, + sym: Symbol, + ) { + if let Some(reg) = X86_64WindowsFastcall::FLOAT_PARAM_REGS.get(self.float_i) { + storage_manager.float_reg_arg(&sym, *reg); self.float_i += 1; } else { storage_manager.primitive_stack_arg(&sym, self.argument_offset); @@ -874,47 +1201,22 @@ impl CallConv for X86_64Windo args: &'a [(InLayout<'a>, Symbol)], ret_layout: &InLayout<'a>, ) { - let mut arg_offset = Self::SHADOW_SPACE_SIZE as i32 + 16; // 16 is the size of the pushed return address and base pointer. + let returns_via_pointer = + X86_64WindowsFastcall::returns_via_arg_pointer(layout_interner, ret_layout); - let mut general_registers_used = 0; - let mut float_registers_used = 0; + let mut state = X64_64WindowsFastCallLoadArgs { + general_i: usize::from(returns_via_pointer), + float_i: 0, + // 16 is the size of the pushed return address and base pointer. + argument_offset: X86_64WindowsFastcall::SHADOW_SPACE_SIZE as i32 + 16, + }; - if X86_64WindowsFastcall::returns_via_arg_pointer(layout_interner, ret_layout) { - storage_manager.ret_pointer_arg(Self::GENERAL_PARAM_REGS[0]); - general_registers_used += 1; + if returns_via_pointer { + storage_manager.ret_pointer_arg(X86_64WindowsFastcall::GENERAL_PARAM_REGS[0]); } - for (layout, sym) in args.iter() { - match layout_interner.get_repr(*layout) { - single_register_integers!() => { - match Self::GENERAL_PARAM_REGS.get(general_registers_used) { - Some(reg) => { - storage_manager.general_reg_arg(sym, *reg); - general_registers_used += 1; - } - None => { - storage_manager.primitive_stack_arg(sym, arg_offset); - arg_offset += 8; - } - } - } - single_register_floats!() => { - match Self::FLOAT_PARAM_REGS.get(float_registers_used) { - Some(reg) => { - storage_manager.float_reg_arg(sym, *reg); - float_registers_used += 1; - } - None => { - storage_manager.primitive_stack_arg(sym, arg_offset); - arg_offset += 8; - } - } - } - _ if layout_interner.stack_size(*layout) == 0 => {} - x => { - todo!("Loading args with layout {:?}", x); - } - } + for (in_layout, sym) in args.iter() { + state.load_arg(storage_manager, layout_interner, *sym, *in_layout); } } @@ -935,64 +1237,40 @@ impl CallConv for X86_64Windo arg_layouts: &[InLayout<'a>], ret_layout: &InLayout<'a>, ) { - let mut tmp_stack_offset = Self::SHADOW_SPACE_SIZE as i32; + let mut general_i = 0; + if Self::returns_via_arg_pointer(layout_interner, ret_layout) { - // Save space on the stack for the arg we will return. - storage_manager.claim_stack_area(dst, layout_interner.stack_size(*ret_layout)); - todo!("claim first parama reg for the address"); + // Save space on the stack for the result we will be return. + let base_offset = + storage_manager.claim_stack_area(dst, layout_interner.stack_size(*ret_layout)); + + // Set the first reg to the address base + offset. + let ret_reg = Self::GENERAL_PARAM_REGS[general_i]; + general_i += 1; + X86_64Assembler::add_reg64_reg64_imm32( + buf, + ret_reg, + X86_64GeneralReg::RBP, + base_offset, + ); } - let mut general_registers_used = 0; - let mut float_registers_used = 0; - - for (sym, layout) in args.iter().zip(arg_layouts.iter()) { - match layout_interner.get_repr(*layout) { - single_register_integers!() => { - match Self::GENERAL_PARAM_REGS.get(general_registers_used) { - Some(reg) => { - storage_manager.load_to_specified_general_reg(buf, sym, *reg); - general_registers_used += 1; - } - None => { - // Copy to stack using return reg as buffer. - let tmp = Self::GENERAL_RETURN_REGS[0]; - - storage_manager.load_to_specified_general_reg(buf, sym, tmp); - X86_64Assembler::mov_stack32_reg64(buf, tmp_stack_offset, tmp); - - tmp_stack_offset += 8; - } - } - } - single_register_floats!() => { - match Self::FLOAT_PARAM_REGS.get(float_registers_used) { - Some(reg) => { - storage_manager.load_to_specified_float_reg(buf, sym, *reg); - float_registers_used += 1; - } - None => { - // Copy to stack using return reg as buffer. - let tmp = Self::FLOAT_RETURN_REGS[0]; - - storage_manager.load_to_specified_float_reg(buf, sym, tmp); - X86_64Assembler::mov_stack32_freg64(buf, tmp_stack_offset, tmp); - - tmp_stack_offset += 8; - } - } - } - _ if layout_interner.stack_size(*layout) == 0 => {} - x => { - todo!("calling with arg type, {:?}", x); - } - } + let mut state = X64_64WindowsFastCallStoreArgs { + general_i, + float_i: 0, + tmp_stack_offset: Self::SHADOW_SPACE_SIZE as i32, + }; + + for (sym, in_layout) in args.iter().zip(arg_layouts.iter()) { + state.store_arg(buf, storage_manager, layout_interner, *sym, *in_layout); } - storage_manager.update_fn_call_stack_size(tmp_stack_offset as u32); + + storage_manager.update_fn_call_stack_size(state.tmp_stack_offset as u32); } fn return_complex_symbol<'a>( - _buf: &mut Vec<'a, u8>, - _storage_manager: &mut StorageManager< + buf: &mut Vec<'a, u8>, + storage_manager: &mut StorageManager< 'a, '_, X86_64GeneralReg, @@ -1000,16 +1278,46 @@ impl CallConv for X86_64Windo X86_64Assembler, X86_64WindowsFastcall, >, - _layout_interner: &mut STLayoutInterner<'a>, - _sym: &Symbol, - _layout: &InLayout<'a>, + layout_interner: &mut STLayoutInterner<'a>, + sym: &Symbol, + layout: &InLayout<'a>, ) { - todo!("Returning complex symbols for X86_64"); + match layout_interner.get_repr(*layout) { + single_register_layouts!() => { + internal_error!("single register layouts are not complex symbols"); + } + _ if layout_interner.stack_size(*layout) == 0 => {} + _ if !Self::returns_via_arg_pointer(layout_interner, layout) => { + let (base_offset, size) = storage_manager.stack_offset_and_size(sym); + debug_assert_eq!(base_offset % 8, 0); + if size <= 8 { + X86_64Assembler::mov_reg64_base32( + buf, + Self::GENERAL_RETURN_REGS[0], + base_offset, + ); + } else { + internal_error!( + "types that don't return via arg pointer must be less than 8 bytes" + ); + } + } + _ => { + // This is a large type returned via the arg pointer. + storage_manager.copy_symbol_to_arg_pointer(buf, sym, layout); + // Also set the return reg to the arg pointer. + storage_manager.load_to_specified_general_reg( + buf, + &Symbol::RET_POINTER, + Self::GENERAL_RETURN_REGS[0], + ); + } + } } fn load_returned_complex_symbol<'a>( - _buf: &mut Vec<'a, u8>, - _storage_manager: &mut StorageManager< + buf: &mut Vec<'a, u8>, + storage_manager: &mut StorageManager< 'a, '_, X86_64GeneralReg, @@ -1017,11 +1325,179 @@ impl CallConv for X86_64Windo X86_64Assembler, X86_64WindowsFastcall, >, - _layout_interner: &mut STLayoutInterner<'a>, - _sym: &Symbol, - _layout: &InLayout<'a>, + layout_interner: &mut STLayoutInterner<'a>, + sym: &Symbol, + layout: &InLayout<'a>, ) { - todo!("Loading returned complex symbols for X86_64"); + match layout_interner.get_repr(*layout) { + single_register_layouts!() => { + internal_error!("single register layouts are not complex symbols"); + } + _ if layout_interner.stack_size(*layout) == 0 => { + storage_manager.no_data(sym); + } + _ if !Self::returns_via_arg_pointer(layout_interner, layout) => { + let size = layout_interner.stack_size(*layout); + let offset = storage_manager.claim_stack_area(sym, size); + if size <= 8 { + X86_64Assembler::mov_base32_reg64(buf, offset, Self::GENERAL_RETURN_REGS[0]); + } else { + internal_error!( + "types that don't return via arg pointer must be less than 8 bytes" + ); + } + } + _ => { + // This should have been recieved via an arg pointer. + // That means the value is already loaded onto the stack area we allocated before the call. + // Nothing to do. + } + } + } + + fn setjmp(buf: &mut Vec<'_, u8>) { + use X86_64GeneralReg::*; + type ASM = X86_64Assembler; + + // input: + // + // rcx: pointer to the jmp_buf + // rdx: stack pointer + + // mingw_getsp: + // lea rax [ rsp + 8 ] + // ret + // + // _setjmp: + // mov [rcx + 0x00] rdx + // mov [rcx + 0x08] rbx + // mov [rcx + 0x18] rbp # note 0x10 is not used yet! + // mov [rcx + 0x20] rsi + // mov [rcx + 0x28] rdi + // mov [rcx + 0x30] r12 + // mov [rcx + 0x38] r13 + // mov [rcx + 0x40] r14 + // mov [rcx + 0x48] r15 + // lea r8 [rsp + 0x08] + // mov [rcx + 0x10] r8 + // mov r8 [rsp] + // mov [rcx + 0x50] r8 + // + // stmxcsr [rcx + 0x58] + // fnstcw word ptr [rcx + 0x5C] + // + // mobdxq xmmword ptr [rcx + 0x60], xmm6 + // mobdxq xmmword ptr [rcx + 0x70], xmm7 + // mobdxq xmmword ptr [rcx + 0x80], xmm8 + // mobdxq xmmword ptr [rcx + 0x90], xmm9 + // mobdxq xmmword ptr [rcx + 0xa0], xmm10 + // mobdxq xmmword ptr [rcx + 0xb0], xmm11 + // mobdxq xmmword ptr [rcx + 0xc0], xmm12 + // mobdxq xmmword ptr [rcx + 0xd0], xmm13 + // mobdxq xmmword ptr [rcx + 0xe0], xmm14 + // mobdxq xmmword ptr [rcx + 0xf0], xmm15 + // + // xor eax, eax + // ret + + let result_pointer = RCX; + let env = RDX; + debug_assert_eq!(env, Self::GENERAL_PARAM_REGS[1]); + + ASM::mov_mem64_offset32_reg64(buf, env, 0x00, RDX); + ASM::mov_mem64_offset32_reg64(buf, env, 0x08, RBX); + // NOTE: 0x10 is unused here! + ASM::mov_mem64_offset32_reg64(buf, env, 0x18, RBP); + ASM::mov_mem64_offset32_reg64(buf, env, 0x20, RSI); + ASM::mov_mem64_offset32_reg64(buf, env, 0x28, RDI); + ASM::mov_mem64_offset32_reg64(buf, env, 0x30, R12); + ASM::mov_mem64_offset32_reg64(buf, env, 0x38, R13); + ASM::mov_mem64_offset32_reg64(buf, env, 0x40, R14); + ASM::mov_mem64_offset32_reg64(buf, env, 0x48, R15); + + // go one value up (as if setjmp wasn't called) + lea_reg64_offset8(buf, R8, RSP, 0x8); + ASM::mov_mem64_offset32_reg64(buf, env, 0x10, R8); + + // store the current stack pointer + ASM::mov_reg64_mem64_offset32(buf, R8, RSP, 0); + ASM::mov_mem64_offset32_reg64(buf, env, 0x50, R8); + + // zero out the fields of the result pointer + ASM::mov_reg64_imm64(buf, R8, 0x00); + ASM::mov_mem64_offset32_reg64(buf, result_pointer, 0x00, R8); + ASM::mov_mem64_offset32_reg64(buf, result_pointer, 0x08, R8); + + // now the windows implementation goes on to store xmm registers and sse2 stuff. + // we skip that for now + + // store the result pointer into the env so that longjmp can retrieve it + ASM::mov_mem64_offset32_reg64(buf, env, 0x58, result_pointer); + + ASM::ret(buf) + } + + fn longjmp(_buf: &mut Vec<'_, u8>) { + // do nothing, longjmp is part of roc_panic + } + + fn roc_panic(buf: &mut Vec<'_, u8>, relocs: &mut Vec<'_, Relocation>) { + use X86_64GeneralReg::*; + type ASM = X86_64Assembler; + + // a *const RocStr + let roc_str_ptr = R11; + ASM::add_reg64_reg64_imm32(buf, roc_str_ptr, RSP, 16 + 24); // 24 is width of a rocstr + + // a 32-bit integer + let panic_tag = RCX; + debug_assert_eq!(panic_tag, Self::GENERAL_PARAM_REGS[0]); + + // the setlongjmp_buffer + let env = R8; + ASM::data_pointer(buf, relocs, String::from("setlongjmp_buffer"), env); + ASM::mov_reg64_mem64_offset32(buf, env, env, 0); + + // move the roc_str bytes into the setlongjmp_buffer + for offset in [0, 8, 16] { + ASM::mov_reg64_mem64_offset32(buf, R9, roc_str_ptr, offset); + ASM::mov_mem64_offset32_reg64(buf, env, 0x60 + offset, R9); + } + + // now, time to move all the registers back to how they were + ASM::mov_reg64_mem64_offset32(buf, RDX, env, 0x00); + ASM::mov_reg64_mem64_offset32(buf, RBX, env, 0x08); + // again 0x10 is skipped here + ASM::mov_reg64_mem64_offset32(buf, RBP, env, 0x18); + ASM::mov_reg64_mem64_offset32(buf, RSI, env, 0x20); + ASM::mov_reg64_mem64_offset32(buf, RDI, env, 0x28); + ASM::mov_reg64_mem64_offset32(buf, R12, env, 0x30); + ASM::mov_reg64_mem64_offset32(buf, R13, env, 0x38); + ASM::mov_reg64_mem64_offset32(buf, R14, env, 0x40); + ASM::mov_reg64_mem64_offset32(buf, R15, env, 0x48); + + // value of rsp before setjmp call + ASM::mov_reg64_mem64_offset32(buf, RSP, env, 0x10); + + // set up the return values. The windows fastcall calling convention has only one return + // register, and we need to return two values, so we use some space in the setlongjmp_buffer + let result_pointer = R9; + ASM::mov_reg64_mem64_offset32(buf, result_pointer, env, 0x58); + + // a pointer to the error message + ASM::mov_reg64_imm64(buf, R10, 0x60); + ASM::add_reg64_reg64_reg64(buf, R10, R10, env); + + // write a pointer to the error message into result_pointer + ASM::mov_mem64_offset32_reg64(buf, result_pointer, 0x00, R10); + + // the panic_tag; 1 is added to differentiate from 0 (which indicates success) + ASM::add_reg64_reg64_imm32(buf, R10, panic_tag, 1); + + // write the panic tag into the result_pointer + ASM::mov_mem64_offset32_reg64(buf, result_pointer, 0x08, R10); + + jmp_reg64_offset8(buf, env, 0x50) } } @@ -1239,6 +1715,21 @@ impl Assembler for X86_64Assembler { }); } + #[inline(always)] + fn data_pointer( + buf: &mut Vec<'_, u8>, + relocs: &mut Vec<'_, Relocation>, + fn_name: String, + dst: X86_64GeneralReg, + ) { + lea_reg64(buf, dst); + + relocs.push(Relocation::LinkedData { + offset: buf.len() as u64 - 4, + name: fn_name, + }); + } + #[inline(always)] fn imul_reg64_reg64_reg64( buf: &mut Vec<'_, u8>, @@ -2610,6 +3101,21 @@ fn jmp_imm32(buf: &mut Vec<'_, u8>, imm: i32) { buf.extend(imm.to_le_bytes()); } +#[inline(always)] +fn jmp_reg64_offset8(buf: &mut Vec<'_, u8>, base: X86_64GeneralReg, offset: i8) { + let rex = add_rm_extension(base, REX_W); + + #[allow(clippy::unusual_byte_groupings)] + buf.extend([rex, 0xff, 0b01_100_000 | (base as u8 % 8)]); + + // Using RSP or R12 requires a secondary index byte. + if base == X86_64GeneralReg::RSP || base == X86_64GeneralReg::R12 { + buf.push(0x24); + } + + buf.extend(offset.to_le_bytes()) +} + /// Jump near if not equal (ZF=0). #[inline(always)] fn jne_imm32(buf: &mut Vec<'_, u8>, imm: i32) { @@ -2662,6 +3168,32 @@ fn lea_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GeneralReg) { ]) } +/// `LEA r64, m` -> Store effective address for m in register r64. +#[inline(always)] +fn lea_reg64_offset8( + buf: &mut Vec<'_, u8>, + dst: X86_64GeneralReg, + src: X86_64GeneralReg, + offset: i8, +) { + let rex = add_rm_extension(src, REX_W); + let rex = add_reg_extension(dst, rex); + + let dst_mod = dst as u8 % 8; + let src_mod = src as u8 % 8; + + #[allow(clippy::unusual_byte_groupings)] + // the upper bits 0b01 of the mod_rm byte indicate 8-bit displacement + buf.extend([rex, 0x8d, 0b01_000_000 | (dst_mod << 3) | src_mod]); + + // Using RSP or R12 requires a secondary index byte. + if src == X86_64GeneralReg::RSP || src == X86_64GeneralReg::R12 { + buf.push(0x24); + } + + buf.push(offset as u8); +} + fn raw_mov_reg_reg( buf: &mut Vec<'_, u8>, register_width: RegisterWidth, @@ -3832,6 +4364,20 @@ mod tests { ); } + #[test] + fn test_jmp_reg64_offset8() { + disassembler_test!( + jmp_reg64_offset8, + |base, offset| if offset < 0x10 { + format!("jmp qword ptr [{base} + {offset:x}]") + } else { + format!("jmp qword ptr [{base} + 0x{offset:x}]") + }, + ALL_GENERAL_REGS, + [0x8, 0x10] + ); + } + #[test] fn test_jne_imm32() { const INST_SIZE: i32 = 6; @@ -3877,6 +4423,23 @@ mod tests { ); } + #[test] + fn test_lea_reg64_offset32() { + disassembler_test!( + lea_reg64_offset8, + |dst, src, offset| { + if offset < 16 { + format!("lea {dst}, [{src} + {offset:x}]") + } else { + format!("lea {dst}, [{src} + 0x{offset:x}]") + } + }, + ALL_GENERAL_REGS, + ALL_GENERAL_REGS, + [0x8i8, 0x10i8] + ); + } + #[test] fn test_mov_reg64_reg64() { disassembler_test!( diff --git a/crates/compiler/gen_dev/src/lib.rs b/crates/compiler/gen_dev/src/lib.rs index 90b2d0cf9d1..87a341d230e 100644 --- a/crates/compiler/gen_dev/src/lib.rs +++ b/crates/compiler/gen_dev/src/lib.rs @@ -291,6 +291,7 @@ trait Backend<'a> { fn interns(&self) -> &Interns; fn interns_mut(&mut self) -> &mut Interns; fn interner(&self) -> &STLayoutInterner<'a>; + fn relocations_mut(&mut self) -> &mut Vec<'a, Relocation>; fn interner_mut(&mut self) -> &mut STLayoutInterner<'a> { self.module_interns_helpers_mut().1 @@ -463,6 +464,11 @@ trait Backend<'a> { /// Used for generating wrappers for malloc/realloc/free fn build_wrapped_jmp(&mut self) -> (&'a [u8], u64); + // use for roc_panic + fn build_roc_setjmp(&mut self) -> &'a [u8]; + fn build_roc_longjmp(&mut self) -> &'a [u8]; + fn build_roc_panic(&mut self) -> (&'a [u8], Vec<'a, Relocation>); + /// build_proc creates a procedure and outputs it to the wrapped object writer. /// Returns the procedure bytes, its relocations, and the names of the refcounting functions it references. fn build_proc( @@ -1661,6 +1667,23 @@ trait Backend<'a> { arg_layouts, ret_layout, ), + LowLevel::SetJmp => self.build_fn_call( + sym, + String::from("roc_setjmp"), + args, + arg_layouts, + ret_layout, + ), + LowLevel::LongJmp => self.build_fn_call( + sym, + String::from("roc_longjmp"), + args, + arg_layouts, + ret_layout, + ), + LowLevel::SetLongJmpBuffer => { + self.build_data_pointer(sym, String::from("setlongjmp_buffer")); + } LowLevel::DictPseudoSeed => self.build_fn_call( sym, bitcode::UTILS_DICT_PSEUDO_SEED.to_string(), @@ -1963,6 +1986,7 @@ trait Backend<'a> { ); fn build_fn_pointer(&mut self, dst: &Symbol, fn_name: String); + fn build_data_pointer(&mut self, dst: &Symbol, data_name: String); /// Move a returned value into `dst` fn move_return_value(&mut self, dst: &Symbol, ret_layout: &InLayout<'a>); diff --git a/crates/compiler/gen_dev/src/object_builder.rs b/crates/compiler/gen_dev/src/object_builder.rs index 504d1f9ea0e..62acda1cb54 100644 --- a/crates/compiler/gen_dev/src/object_builder.rs +++ b/crates/compiler/gen_dev/src/object_builder.rs @@ -74,6 +74,23 @@ pub fn build_module<'a, 'r>( ), ) } + Triple { + architecture: TargetArch::X86_64, + binary_format: TargetBF::Coff, + .. + } if cfg!(feature = "target-x86_64") => { + let backend = new_backend_64bit::< + x86_64::X86_64GeneralReg, + x86_64::X86_64FloatReg, + x86_64::X86_64Assembler, + x86_64::X86_64WindowsFastcall, + >(env, TargetInfo::default_x86_64(), interns, layout_interner); + build_object( + procedures, + backend, + Object::new(BinaryFormat::Coff, Architecture::X86_64, Endianness::Little), + ) + } Triple { architecture: TargetArch::Aarch64(_), binary_format: TargetBF::Elf, @@ -118,6 +135,111 @@ pub fn build_module<'a, 'r>( } } +fn define_setlongjmp_buffer(output: &mut Object) -> SymbolId { + let bss_section = output.section_id(StandardSection::Data); + + // 8 registers + 3 words for a RocStr + // TODO 50 is the wrong size here, look at implementation and put correct value in here + const SIZE: usize = (8 + 50) * core::mem::size_of::(); + + let symbol = Symbol { + name: b"setlongjmp_buffer".to_vec(), + value: 0, + size: SIZE as u64, + kind: SymbolKind::Data, + scope: SymbolScope::Dynamic, + weak: false, + section: SymbolSection::Section(bss_section), + flags: SymbolFlags::None, + }; + + let symbol_id = output.add_symbol(symbol); + output.add_symbol_data(symbol_id, bss_section, &[0x00; SIZE], 8); + + symbol_id +} + +fn generate_setjmp<'a, B: Backend<'a>>(backend: &mut B, output: &mut Object) { + let text_section = output.section_id(StandardSection::Text); + let proc_symbol = Symbol { + name: b"roc_setjmp".to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Dynamic, + weak: false, + section: SymbolSection::Section(text_section), + flags: SymbolFlags::None, + }; + let proc_id = output.add_symbol(proc_symbol); + let proc_data = backend.build_roc_setjmp(); + + output.add_symbol_data(proc_id, text_section, proc_data, 16); +} + +fn generate_longjmp<'a, B: Backend<'a>>(backend: &mut B, output: &mut Object) { + let text_section = output.section_id(StandardSection::Text); + let proc_symbol = Symbol { + name: b"roc_longjmp".to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Dynamic, + weak: false, + section: SymbolSection::Section(text_section), + flags: SymbolFlags::None, + }; + let proc_id = output.add_symbol(proc_symbol); + let proc_data = backend.build_roc_longjmp(); + + output.add_symbol_data(proc_id, text_section, proc_data, 16); +} + +// a roc_panic to be used in tests; relies on setjmp/longjmp +fn generate_roc_panic<'a, B: Backend<'a>>(backend: &mut B, output: &mut Object) { + let text_section = output.section_id(StandardSection::Text); + let proc_symbol = Symbol { + name: b"roc_panic".to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Dynamic, + weak: false, + section: SymbolSection::Section(text_section), + flags: SymbolFlags::None, + }; + let proc_id = output.add_symbol(proc_symbol); + let (proc_data, relocs) = backend.build_roc_panic(); + + let proc_offset = output.add_symbol_data(proc_id, text_section, proc_data, 16); + + for r in relocs { + let relocation = match r { + Relocation::LinkedData { offset, name } => { + if let Some(sym_id) = output.symbol_id(name.as_bytes()) { + write::Relocation { + offset: offset + proc_offset, + size: 32, + kind: RelocationKind::GotRelative, + encoding: RelocationEncoding::Generic, + symbol: sym_id, + addend: -4, + } + } else { + internal_error!("failed to find data symbol for {:?}", name); + } + } + Relocation::LocalData { .. } + | Relocation::LinkedFunction { .. } + | Relocation::JmpToReturn { .. } => { + unreachable!("not currently created by build_roc_panic") + } + }; + + output.add_relocation(text_section, relocation).unwrap(); + } +} + fn generate_wrapper<'a, B: Backend<'a>>( backend: &mut B, output: &mut Object, @@ -190,6 +312,12 @@ fn build_object<'a, B: Backend<'a>>( ); */ + define_setlongjmp_buffer(&mut output); + + generate_roc_panic(&mut backend, &mut output); + generate_setjmp(&mut backend, &mut output); + generate_longjmp(&mut backend, &mut output); + if backend.env().mode.generate_allocators() { generate_wrapper( &mut backend, @@ -209,12 +337,7 @@ fn build_object<'a, B: Backend<'a>>( "roc_dealloc".into(), "free".into(), ); - generate_wrapper( - &mut backend, - &mut output, - "roc_panic".into(), - "roc_builtins.utils.test_panic".into(), - ); + // Extra symbols only required on unix systems. if matches!(output.format(), BinaryFormat::Elf | BinaryFormat::MachO) { generate_wrapper( @@ -230,6 +353,15 @@ fn build_object<'a, B: Backend<'a>>( "roc_shm_open".into(), "shm_open".into(), ); + } else if matches!(output.format(), BinaryFormat::Coff) { + // TODO figure out why this symbol is required, it should not be required + // Without this it does not build on Windows + generate_wrapper( + &mut backend, + &mut output, + "roc_getppid".into(), + "malloc".into(), + ); } } @@ -245,6 +377,18 @@ fn build_object<'a, B: Backend<'a>>( let exposed_proc = build_exposed_proc(&mut backend, &proc); let exposed_generic_proc = build_exposed_generic_proc(&mut backend, &proc); + let (module_id, layout_interner, interns, code_gen_help, _) = + backend.module_interns_helpers_mut(); + + let ident_ids = interns.all_ident_ids.get_mut(&module_id).unwrap(); + + let test_helper = roc_mono::code_gen_help::test_helper( + code_gen_help, + ident_ids, + layout_interner, + &proc, + ); + #[cfg(debug_assertions)] { let module_id = exposed_generic_proc.name.name().module_id(); @@ -256,6 +400,18 @@ fn build_object<'a, B: Backend<'a>>( module_id.register_debug_idents(ident_ids); } + // println!("{}", test_helper.to_pretty(backend.interner(), 200, true)); + + build_proc_symbol( + &mut output, + &mut layout_ids, + &mut procs, + &mut backend, + layout, + test_helper, + Exposed::TestMain, + ); + build_proc_symbol( &mut output, &mut layout_ids, @@ -535,6 +691,7 @@ enum Exposed { ExposedGeneric, Exposed, NotExposed, + TestMain, } fn build_proc_symbol<'a, B: Backend<'a>>( @@ -567,6 +724,7 @@ fn build_proc_symbol<'a, B: Backend<'a>>( None, layout.result, ), + Exposed::TestMain => String::from("test_main"), }; let proc_symbol = Symbol { @@ -577,7 +735,7 @@ fn build_proc_symbol<'a, B: Backend<'a>>( // TODO: Depending on whether we are building a static or dynamic lib, this should change. // We should use Dynamic -> anyone, Linkage -> static link, Compilation -> this module only. scope: match exposed { - Exposed::ExposedGeneric | Exposed::Exposed => SymbolScope::Dynamic, + Exposed::ExposedGeneric | Exposed::Exposed | Exposed::TestMain => SymbolScope::Dynamic, Exposed::NotExposed => SymbolScope::Linkage, }, weak: false, diff --git a/crates/compiler/mono/src/code_gen_help/mod.rs b/crates/compiler/mono/src/code_gen_help/mod.rs index 861aaec471e..f0587fa70d9 100644 --- a/crates/compiler/mono/src/code_gen_help/mod.rs +++ b/crates/compiler/mono/src/code_gen_help/mod.rs @@ -6,8 +6,8 @@ use roc_module::symbol::{IdentIds, ModuleId, Symbol}; use roc_target::TargetInfo; use crate::ir::{ - Call, CallSpecId, CallType, Expr, HostExposedLayouts, JoinPointId, ModifyRc, PassedFunction, - Proc, ProcLayout, SelfRecursive, Stmt, UpdateModeId, + BranchInfo, Call, CallSpecId, CallType, Expr, HostExposedLayouts, JoinPointId, Literal, + ModifyRc, PassedFunction, Proc, ProcLayout, SelfRecursive, Stmt, UpdateModeId, }; use crate::layout::{ Builtin, InLayout, LambdaName, Layout, LayoutInterner, LayoutRepr, LayoutWrapper, Niche, @@ -848,3 +848,224 @@ fn layout_needs_helper_proc<'a>( LayoutRepr::Erased(_) => true, } } + +pub fn test_helper<'a>( + env: &CodeGenHelp<'a>, + ident_ids: &mut IdentIds, + layout_interner: &mut STLayoutInterner<'a>, + main_proc: &Proc<'a>, +) -> Proc<'a> { + let name = LambdaName::no_niche(env.create_symbol(ident_ids, "test_main")); + + let it = (0..main_proc.args.len()).map(|i| env.create_symbol(ident_ids, &format!("arg_{i}"))); + let arguments = Vec::from_iter_in(it, env.arena).into_bump_slice(); + + let it = arguments + .iter() + .zip(main_proc.args.iter()) + .map(|(s, (l, _))| (*l, *s)); + let args = Vec::from_iter_in(it, env.arena).into_bump_slice(); + + // tag: u64, + // error_msg: *mut RocStr, + // value: MaybeUninit, + let fields = [Layout::U64, Layout::U64, main_proc.ret_layout]; + let repr = LayoutRepr::Struct(env.arena.alloc(fields)); + let output_layout = layout_interner.insert_direct_no_semantic(repr); + let body = test_helper_body( + env, + ident_ids, + layout_interner, + main_proc, + arguments, + output_layout, + ); + + Proc { + name, + args, + body, + closure_data_layout: None, + ret_layout: output_layout, + is_self_recursive: main_proc.is_self_recursive, + host_exposed_layouts: HostExposedLayouts::HostExposed { + rigids: Default::default(), + aliases: Default::default(), + }, + is_erased: false, + } +} + +fn test_helper_body<'a>( + env: &CodeGenHelp<'a>, + ident_ids: &mut IdentIds, + layout_interner: &mut STLayoutInterner<'a>, + main_proc: &Proc<'a>, + arguments: &'a [Symbol], + output_layout: InLayout<'a>, +) -> Stmt<'a> { + // let buffer = SetLongJmpBuffer + let buffer_symbol = env.create_symbol(ident_ids, "buffer"); + let buffer_expr = Expr::Call(Call { + call_type: CallType::LowLevel { + op: LowLevel::SetLongJmpBuffer, + update_mode: UpdateModeId::BACKEND_DUMMY, + }, + arguments: &[], + }); + let buffer_stmt = |next| Stmt::Let(buffer_symbol, buffer_expr, Layout::U64, next); + + let field_layouts = env.arena.alloc([Layout::U64, Layout::U64]); + let ret_layout = layout_interner.insert_direct_no_semantic(LayoutRepr::Struct(field_layouts)); + + let setjmp_symbol = env.create_symbol(ident_ids, "setjmp"); + let setjmp_expr = Expr::Call(Call { + call_type: CallType::LowLevel { + op: LowLevel::SetJmp, + update_mode: UpdateModeId::BACKEND_DUMMY, + }, + arguments: env.arena.alloc([buffer_symbol]), + }); + let setjmp_stmt = |next| Stmt::Let(setjmp_symbol, setjmp_expr, ret_layout, next); + + let is_longjmp_symbol = env.create_symbol(ident_ids, "is_longjmp"); + let is_longjmp_expr = Expr::StructAtIndex { + index: 0, + field_layouts, + structure: setjmp_symbol, + }; + let is_longjmp_stmt = |next| Stmt::Let(is_longjmp_symbol, is_longjmp_expr, Layout::U64, next); + + let tag_symbol = env.create_symbol(ident_ids, "tag"); + let tag_expr = Expr::StructAtIndex { + index: 1, + field_layouts, + structure: setjmp_symbol, + }; + let tag_stmt = |next| Stmt::Let(tag_symbol, tag_expr, Layout::U64, next); + + // normal path, no panics + let if_zero_stmt = { + let it = main_proc.args.iter().map(|(a, _)| *a); + let arg_layouts = Vec::from_iter_in(it, env.arena).into_bump_slice(); + + let result_symbol = env.create_symbol(ident_ids, "result"); + let result_expr = Expr::Call(Call { + call_type: CallType::ByName { + name: main_proc.name, + ret_layout: main_proc.ret_layout, + arg_layouts, + specialization_id: CallSpecId::BACKEND_DUMMY, + }, + arguments, + }); + let result = |next| Stmt::Let(result_symbol, result_expr, main_proc.ret_layout, next); + + let ok_tag_symbol = env.create_symbol(ident_ids, "ok_tag"); + let ok_tag_expr = Expr::Literal(Literal::Int((0i128).to_ne_bytes())); + let ok_tag = |next| Stmt::Let(ok_tag_symbol, ok_tag_expr, Layout::U64, next); + + let msg_ptr_symbol = env.create_symbol(ident_ids, "msg_ptr"); + let msg_ptr_expr = Expr::Literal(Literal::Int((0i128).to_ne_bytes())); + let msg_ptr = |next| Stmt::Let(msg_ptr_symbol, msg_ptr_expr, Layout::U64, next); + + // construct the record + let output_symbol = env.create_symbol(ident_ids, "output_ok"); + let fields = [ok_tag_symbol, msg_ptr_symbol, result_symbol]; + let output_expr = Expr::Struct(env.arena.alloc(fields)); + let output = |next| Stmt::Let(output_symbol, output_expr, output_layout, next); + + let arena = env.arena; + result(arena.alloc( + // + ok_tag(arena.alloc( + // + msg_ptr(arena.alloc( + // + output(arena.alloc( + // + Stmt::Ret(output_symbol), + )), + )), + )), + )) + }; + + // a longjmp/panic occurred + let if_nonzero_stmt = { + let alloca_symbol = env.create_symbol(ident_ids, "alloca"); + let alloca_expr = Expr::Alloca { + element_layout: main_proc.ret_layout, + initializer: None, + }; + let alloca = |next| Stmt::Let(alloca_symbol, alloca_expr, Layout::U64, next); + + let load_symbol = env.create_symbol(ident_ids, "load"); + let load_expr = Expr::Call(Call { + call_type: CallType::LowLevel { + op: LowLevel::PtrLoad, + update_mode: UpdateModeId::BACKEND_DUMMY, + }, + arguments: env.arena.alloc([alloca_symbol]), + }); + let load = |next| Stmt::Let(load_symbol, load_expr, main_proc.ret_layout, next); + + // construct the record + let output_symbol = env.create_symbol(ident_ids, "output_err"); + // is_longjmp_symbol is a pointer to the error message + let fields = [tag_symbol, is_longjmp_symbol, load_symbol]; + let output_expr = Expr::Struct(env.arena.alloc(fields)); + let output = |next| Stmt::Let(output_symbol, output_expr, output_layout, next); + + let arena = env.arena; + arena.alloc(alloca(arena.alloc( + // + load(arena.alloc( + // + output(arena.alloc( + // + Stmt::Ret(output_symbol), + )), + )), + ))) + }; + + buffer_stmt(env.arena.alloc( + // + setjmp_stmt(env.arena.alloc( + // + is_longjmp_stmt(env.arena.alloc( + // + tag_stmt(env.arena.alloc( + // + switch_if_zero_else( + env.arena, + is_longjmp_symbol, + output_layout, + if_zero_stmt, + if_nonzero_stmt, + ), + )), + )), + )), + )) +} + +fn switch_if_zero_else<'a>( + arena: &'a Bump, + condition_symbol: Symbol, + return_layout: InLayout<'a>, + then_branch_stmt: Stmt<'a>, + else_branch_stmt: &'a Stmt<'a>, +) -> Stmt<'a> { + let then_branch = (0u64, BranchInfo::None, then_branch_stmt); + let else_branch = (BranchInfo::None, else_branch_stmt); + + Stmt::Switch { + cond_symbol: condition_symbol, + cond_layout: Layout::U64, + branches: &*arena.alloc([then_branch]), + default_branch: else_branch, + ret_layout: return_layout, + } +} diff --git a/crates/compiler/test_gen/src/gen_primitives.rs b/crates/compiler/test_gen/src/gen_primitives.rs index 7c51ad5c8f8..8afd1e9ddd3 100644 --- a/crates/compiler/test_gen/src/gen_primitives.rs +++ b/crates/compiler/test_gen/src/gen_primitives.rs @@ -992,6 +992,24 @@ fn undefined_variable() { ); } +#[test] +#[cfg(any(feature = "gen-llvm", feature = "gen-wasm", feature = "gen-dev"))] +#[should_panic(expected = "User crash with message: \"a crash\"")] +fn a_crash() { + assert_evals_to!( + indoc!( + r#" + if Bool.true then + crash "a crash" + else + 0u64 + "# + ), + 3, + i64 + ); +} + #[test] #[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))] #[should_panic(expected = "Roc failed with message: ")] diff --git a/crates/compiler/test_gen/src/helpers/dev.rs b/crates/compiler/test_gen/src/helpers/dev.rs index fd94e6e87fa..e2238b2703e 100644 --- a/crates/compiler/test_gen/src/helpers/dev.rs +++ b/crates/compiler/test_gen/src/helpers/dev.rs @@ -2,10 +2,13 @@ use libloading::Library; use roc_build::link::{link, LinkType}; use roc_builtins::bitcode; use roc_load::{EntryPoint, ExecutionMode, LoadConfig, Threading}; +use roc_mono::ir::CrashTag; use roc_mono::ir::SingleEntryPoint; use roc_packaging::cache::RocCacheDir; use roc_region::all::LineInfo; use roc_solve::FunctionKind; +use roc_std::RocStr; +use std::mem::MaybeUninit; use tempfile::tempdir; #[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))] @@ -212,8 +215,10 @@ pub fn helper( let builtins_host_tempfile = roc_bitcode::host_tempfile().expect("failed to write host builtins object to tempfile"); + // TODO make this an envrionment variable if false { - std::fs::copy(&app_o_file, "/tmp/app.o").unwrap(); + let file_path = std::env::temp_dir().join("app.o"); + std::fs::copy(&app_o_file, file_path).unwrap(); } let (mut child, dylib_path) = link( @@ -245,6 +250,81 @@ pub fn helper( (main_fn_name, delayed_errors, lib) } +#[derive(Debug)] +#[repr(C)] +pub struct RocCallResult { + pub tag: u64, + pub error_msg: *mut RocStr, + pub value: MaybeUninit, +} + +impl RocCallResult { + pub fn new(value: T) -> Self { + Self { + tag: 0, + error_msg: std::ptr::null_mut(), + value: MaybeUninit::new(value), + } + } +} + +impl Default for RocCallResult { + fn default() -> Self { + Self { + tag: 0, + error_msg: std::ptr::null_mut(), + value: MaybeUninit::new(Default::default()), + } + } +} + +impl RocCallResult { + pub fn into_result(self) -> Result { + match self.tag { + 0 => Ok(unsafe { self.value.assume_init() }), + n => Err({ + let mut msg = RocStr::default(); + + unsafe { std::ptr::swap(&mut msg, self.error_msg) }; + + let tag = (n - 1) as u32; + let tag = tag + .try_into() + .unwrap_or_else(|_| panic!("received illegal tag: {tag} {msg}")); + + (msg.as_str().to_owned(), tag) + }), + } + } +} + +fn get_test_main_fn( + lib: &libloading::Library, +) -> libloading::Symbol RocCallResult> { + let main_fn_name = "test_main"; + + unsafe { + lib.get(main_fn_name.as_bytes()) + .ok() + .ok_or(format!("Unable to JIT compile `{main_fn_name}`")) + .expect("errored") + } +} + +pub(crate) fn run_test_main(lib: &libloading::Library) -> Result { + let main = get_test_main_fn::(lib); + + let result = unsafe { main() }; + + result.into_result() +} + +impl From> for Result { + fn from(call_result: RocCallResult) -> Self { + call_result.into_result() + } +} + #[allow(unused_macros)] macro_rules! assert_evals_to { ($src:expr, $expected:expr, $ty:ty) => {{ @@ -267,19 +347,40 @@ macro_rules! assert_evals_to { }; ($src:expr, $expected:expr, $ty:ty, $transform:expr, $leak:expr, $lazy_literals:expr) => { use bumpalo::Bump; - use roc_gen_dev::run_jit_function_raw; let arena = Bump::new(); - let (main_fn_name, errors, lib) = + let (_main_fn_name, errors, lib) = $crate::helpers::dev::helper(&arena, $src, $leak, $lazy_literals); - let transform = |success| { - let expected = $expected; - #[allow(clippy::redundant_closure_call)] - let given = $transform(success); - assert_eq!(&given, &expected); - }; - run_jit_function_raw!(lib, main_fn_name, $ty, transform, errors) + let result = $crate::helpers::dev::run_test_main::<$ty>(&lib); + + if !errors.is_empty() { + dbg!(&errors); + + assert_eq!( + errors, + std::vec::Vec::new(), + "Encountered errors: {:?}", + errors + ); + } + + match result { + Ok(value) => { + let expected = $expected; + #[allow(clippy::redundant_closure_call)] + let given = $transform(value); + assert_eq!(&given, &expected, "output is different"); + } + Err((msg, tag)) => { + use roc_mono::ir::CrashTag; + + match tag { + CrashTag::Roc => panic!(r#"Roc failed with message: "{msg}""#), + CrashTag::User => panic!(r#"User crash with message: "{msg}""#), + } + } + } }; }