From 6cdf3cfa64fd88f5240ed9c784586529ad24e985 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 25 Jan 2024 13:31:30 +0000 Subject: [PATCH] [all]: Rework Neon load/store and load/store pair Currently Neon load/store is split in two instructions: one for handling immediate/register offset and another for handling post-index variant. The similar applies to Neon load/store pair. Note, missing support for pre-index variant for Neon load/store. Let's refactor those instructions to take advantage of existing infrastructure for handling addressing modes. That removes need for special instruction for post-index variant and as a bonus naturally brings support for pre-index variant for Neon load/store. Signed-off-by: Vladimir Murzin --- gen/AArch64Compile_gen.ml | 18 +- herd/AArch64Arch_herd.ml | 26 ++- herd/AArch64Sem.ml | 118 +++++++----- jingle/AArch64Arch_jingle.ml | 7 +- lib/AArch64Base.ml | 165 ++++++----------- lib/AArch64Parser.mly | 98 +++------- litmus/AArch64Compile_litmus.ml | 319 +++++++++++++------------------- 7 files changed, 301 insertions(+), 450 deletions(-) diff --git a/gen/AArch64Compile_gen.ml b/gen/AArch64Compile_gen.ml index 614ae7a4c5..87fb37669b 100644 --- a/gen/AArch64Compile_gen.ml +++ b/gen/AArch64Compile_gen.ml @@ -582,7 +582,7 @@ module Make(Cfg:Config) : XXXCompile_gen.S = let emit_load_reg temporal st init rA = let r1,st = next_vreg st in let r2,st = next_vreg st in - let ldp = [I_LDP_SIMD(temporal,A64.VSIMD32,to_scalar r1,to_scalar r2,rA,0)] in + let ldp = [I_LDP_SIMD(temporal,A64.VSIMD32,to_scalar r1,to_scalar r2,rA,(0,A64.Idx))] in let r3,st = next_vreg st in let add = [I_ADD_SIMD (r3,r1,r2)] in let rX,st = next_reg st in @@ -603,7 +603,7 @@ module Make(Cfg:Config) : XXXCompile_gen.S = module LDUR = struct let emit_load_reg st init rA = let r,st = next_scalar_reg st in - let ldur = [I_LDUR_SIMD(A64.VSIMD32,r,rA,None)] in + let ldur = [I_LDUR_SIMD(A64.VSIMD32,r,rA,0)] in let rX,st = next_reg st in let fmov = [I_FMOV_TG(A64.V32,rX,A64.VSIMD32,r)] in rX,init,lift_code (ldur@fmov),st @@ -622,7 +622,7 @@ module Make(Cfg:Config) : XXXCompile_gen.S = module LDAPUR = struct let emit_load_reg st init rA = let r,st = next_scalar_reg st in - let ldur = [I_LDAPUR_SIMD(A64.VSIMD32,r,rA,None)] in + let ldur = [I_LDAPUR_SIMD(A64.VSIMD32,r,rA,0)] in let rX,st = next_reg st in let fmov = [I_FMOV_TG(A64.V32,rX,A64.VSIMD32,r)] in rX,init,lift_code (ldur@fmov),st @@ -834,7 +834,7 @@ module Make(Cfg:Config) : XXXCompile_gen.S = let r1,st = next_vreg st in let r2,st = next_vreg st in let movi = List.mapi (fun i r -> movi_reg r (v+i)) [r1;r2] in - let stp = [I_STP_SIMD(temporal,A64.VSIMD32,to_scalar r1,to_scalar r2,rA,0)] in + let stp = [I_STP_SIMD(temporal,A64.VSIMD32,to_scalar r1,to_scalar r2,rA,(0,A64.Idx))] in init,pseudo movi@pseudo stp,st let emit_store n st p init loc v = @@ -854,7 +854,7 @@ module Make(Cfg:Config) : XXXCompile_gen.S = let r2,st = next_vreg st in let movi = List.mapi (fun i r -> movi_reg r (v+i)) [r1;r2] in let adds = List.map (fun v -> add_simd v rB) [r1;r2] in - let stp = [I_STP_SIMD(temporal,A64.VSIMD32,to_scalar r1,to_scalar r2,rA,0)] in + let stp = [I_STP_SIMD(temporal,A64.VSIMD32,to_scalar r1,to_scalar r2,rA,(0,A64.Idx))] in init,lift_code(dup@movi@adds@stp),st end @@ -862,7 +862,7 @@ module Make(Cfg:Config) : XXXCompile_gen.S = let emit_store_reg st init rA v = let r,st = next_vreg st in let movi = [movi_reg r v] in - let stur = [I_STUR_SIMD(A64.VSIMD32,to_scalar r,rA,None)] in + let stur = [I_STUR_SIMD(A64.VSIMD32,to_scalar r,rA,0)] in init,lift_code(movi@stur),st let emit_store st p init loc v = @@ -881,7 +881,7 @@ module Make(Cfg:Config) : XXXCompile_gen.S = let r1,st = next_vreg st in let movi = [movi_reg r1 v] in let adds = [add_simd r1 rB]in - let stur = [I_STUR_SIMD(A64.VSIMD32,to_scalar r1,rA,None)] in + let stur = [I_STUR_SIMD(A64.VSIMD32,to_scalar r1,rA,0)] in init,lift_code(dup@movi@adds@stur),st end @@ -889,7 +889,7 @@ module Make(Cfg:Config) : XXXCompile_gen.S = let emit_store_reg st init rA v = let r,st = next_vreg st in let movi = [movi_reg r v] in - let stlur = [I_STLUR_SIMD(A64.VSIMD32,to_scalar r,rA,None)] in + let stlur = [I_STLUR_SIMD(A64.VSIMD32,to_scalar r,rA,0)] in init,lift_code(movi@stlur),st let emit_store st p init loc v = @@ -908,7 +908,7 @@ module Make(Cfg:Config) : XXXCompile_gen.S = let r1,st = next_vreg st in let movi = [movi_reg r1 v] in let adds = [add_simd r1 rB]in - let stlur = [I_STLUR_SIMD(A64.VSIMD32,to_scalar r1,rA,None)] in + let stlur = [I_STLUR_SIMD(A64.VSIMD32,to_scalar r1,rA,0)] in init,lift_code(dup@movi@adds@stlur),st end diff --git a/herd/AArch64Arch_herd.ml b/herd/AArch64Arch_herd.ml index bd09310b56..7c75f1c5a0 100644 --- a/herd/AArch64Arch_herd.ml +++ b/herd/AArch64Arch_herd.ml @@ -90,16 +90,16 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_EOR_SIMD _| I_ERET| I_FENCE _| I_GC _| I_IC _| I_LD1 _| I_LD1M _| I_LD1R _ | I_LDAP1 _ | I_LD2 _| I_LD2M _| I_LD2R _| I_LD3 _| I_LD3M _| I_LD3R _| I_LD4 _| I_LD4M _ | I_LD4R _| I_LDAR _| I_LDARBH _| I_LDCT _| I_LDG _| I_LDOP _| I_LDOPBH _ - | I_LDP _| I_LDP_P_SIMD _| I_LDP_SIMD _| I_LDPSW _| I_LDR _ - | I_LDRSW _ | I_LDR_P_SIMD _ | I_LDAPUR_SIMD _ - | I_LDR_SIMD _| I_LDRBH _| I_LDRS _| I_LDUR _| I_LDUR_SIMD _| I_LDXP _| I_MOV _ | I_FMOV_TG _ + | I_LDP _| I_LDP_SIMD _| I_LDPSW _| I_LDR _ + | I_LDRSW _ | I_LDR_SIMD _ | I_LDAPUR_SIMD _ + | I_LDRBH _| I_LDRS _| I_LDUR _| I_LDUR_SIMD _| I_LDXP _| I_MOV _ | I_FMOV_TG _ | I_ADDV _| I_DUP _ | I_MOV_FG _| I_MOV_S _| I_MOV_TG _| I_MOV_V _| I_MOV_VE _| I_MOVI_S _ | I_MOVI_V _| I_MOVK _| I_MOVZ _| I_MOVN _| I_MRS _| I_MSR _| I_OP3 _| I_RBIT _ | I_RET _ | I_SBFM _| I_SC _| I_SEAL _| I_ST1 _| I_STL1 _| I_ST1M _| I_ST2 _| I_ST2M _| I_ST3 _ | I_ST3M _| I_ST4 _| I_ST4M _| I_STCT _| I_STG _| I_STLR _| I_STLRBH _| I_STOP _ - | I_STOPBH _| I_STP _| I_STP_P_SIMD _| I_STP_SIMD _| I_STR _ | I_STLUR_SIMD _ - | I_STR_P_SIMD _| I_STR_SIMD _| I_STRBH _| I_STUR_SIMD _| I_STXP _| I_STXR _ + | I_STOPBH _| I_STP _| I_STP_SIMD _| I_STR _ | I_STLUR_SIMD _ + | I_STR_SIMD _| I_STRBH _| I_STUR_SIMD _| I_STXP _| I_STXR _ | I_STXRBH _| I_STZG _| I_STZ2G _ | I_SWP _| I_SWPBH _| I_SXTW _| I_TLBI _| I_UBFM _ | I_UDF _| I_UNSEAL _ | I_ADDSUBEXT _ | I_ABS _ | I_REV _ | I_EXTR _ @@ -229,10 +229,10 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_LDOP (_,v,_,_,_,_) | I_STOP (_,v,_,_,_) -> Some (tr_variant v) | I_STZG _|I_STZ2G _ -> Some MachSize.granule - | I_LDR_SIMD (v,_,_,_,_) | I_LDR_P_SIMD (v,_,_,_) - | I_LDP_SIMD (_,v,_,_,_,_) | I_LDP_P_SIMD (_,v,_,_,_,_) - | I_STR_SIMD (v,_,_,_,_) | I_STR_P_SIMD (v,_,_,_) - | I_STP_SIMD (_,v,_,_,_,_) | I_STP_P_SIMD (_,v,_,_,_,_) + | I_LDR_SIMD (v,_,_,_) + | I_LDP_SIMD (_,v,_,_,_,_) + | I_STR_SIMD (v,_,_,_) + | I_STP_SIMD (_,v,_,_,_,_) | I_LDUR_SIMD (v,_,_,_) | I_STUR_SIMD (v,_,_,_) | I_LDAPUR_SIMD (v,_,_,_) | I_STLUR_SIMD (v,_,_,_) -> Some (tr_simd_variant v) @@ -344,10 +344,8 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_LD3R _|I_LD4 _|I_LD4M _|I_LD4R _ | I_ST1 _|I_ST1M _|I_ST2 _|I_ST2M _ | I_ST3 _|I_ST3M _|I_ST4 _|I_ST4M _ - | I_LDP_P_SIMD _|I_STP_P_SIMD _ | I_LDP_SIMD _|I_STP_SIMD _ - | I_LDR_SIMD _|I_LDR_P_SIMD _ - | I_STR_SIMD _|I_STR_P_SIMD _ + | I_LDR_SIMD _|I_STR_SIMD _ | I_LDUR_SIMD _|I_LDAPUR_SIMD _|I_STUR_SIMD _|I_STLUR_SIMD _ | I_MOV_VE _ | I_MOV_V _|I_MOV_TG _|I_MOV_FG _ @@ -376,10 +374,8 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_LD4 _|I_LD4M _|I_LD4R _|I_ST1 _|I_STL1 _ | I_ST1M _|I_ST2 _|I_ST2M _|I_ST3 _ | I_ST3M _|I_ST4 _|I_ST4M _ - | I_LDP_P_SIMD _|I_STP_P_SIMD _ | I_LDP_SIMD _|I_STP_SIMD _ - | I_LDR_SIMD _|I_LDR_P_SIMD _ - | I_STR_SIMD _|I_STR_P_SIMD _ + | I_LDR_SIMD _|I_STR_SIMD _ | I_LDUR_SIMD _|I_LDAPUR_SIMD _|I_STUR_SIMD _|I_STLUR_SIMD _ | I_MOV_VE _ | I_MOV_V _|I_MOV_TG _|I_MOV_FG _ diff --git a/herd/AArch64Sem.ml b/herd/AArch64Sem.ml index 977aaa28d7..4db24525dc 100644 --- a/herd/AArch64Sem.ml +++ b/herd/AArch64Sem.ml @@ -1890,9 +1890,9 @@ module Make let simd_ldr = do_simd_ldr Annot.N let simd_ldar = do_simd_ldr Annot.Q - let do_simd_str an sz rs rd kr s ii = - get_ea rs kr s ii >>| - read_reg_neon true rd ii >>= fun (addr, v) -> + let do_simd_str an sz ma rd ii = + ma >>| + read_reg_neon true rd ii >>= fun (addr,v) -> if sz == MachSize.S128 then do_write_mem_2_ops sz an aexp Access.VIR addr v ii >>= B.next2T else @@ -1901,9 +1901,9 @@ module Make let simd_str = do_simd_str Annot.N let simd_stlr = do_simd_str Annot.L - let simd_str_p sz rs rd k ii = - read_reg_ord rs ii >>| - read_reg_neon true rd ii >>= fun (addr, v) -> + let simd_str_p sz ma rd rs k ii = + ma >>| + read_reg_neon true rd ii >>= fun (addr,v) -> if sz == MachSize.S128 then (* 128-bit Neon LDR/STR and friends are split into two 64-bit * single-copy atomic accesses. *) @@ -2625,57 +2625,85 @@ module Make !!!!(read_reg_ord rA ii >>= fun addr -> (store_m addr rs ii >>| post_kr rA addr kr ii)) - - | I_LDR_SIMD(var,r1,rA,kr,s) -> + | I_LDR_SIMD(var,r1,rA,MemExt.Reg(v,kr,sext,s)) -> + let access_size = tr_simd_variant var in + get_ea_reg rA v kr sext s ii >>= fun addr -> + simd_ldr access_size addr r1 ii >>= B.next1T + | I_LDR_SIMD(var,r1,rA,MemExt.Imm (k,Idx)) -> + let access_size = tr_simd_variant var in + get_ea_idx rA k ii >>= fun addr -> + simd_ldr access_size addr r1 ii >>= B.next1T + | I_LDR_SIMD(var,r1,rA,MemExt.Imm (k,PreIdx)) -> let access_size = tr_simd_variant var in - get_ea rA kr s ii >>= fun addr -> + get_ea_preindexed rA k ii >>= fun addr -> simd_ldr access_size addr r1 ii >>= B.next1T - | I_LDR_P_SIMD(var,r1,rA,k) -> + | I_LDR_SIMD(var,r1,rA,MemExt.Imm (k,PostIdx)) -> let access_size = tr_simd_variant var in read_reg_ord rA ii >>= fun addr -> simd_ldr access_size addr r1 ii >>| post_kr rA addr (K k) ii >>= B.next2T | I_LDUR_SIMD(var,r1,rA,k) -> - let access_size = tr_simd_variant var and - k = K (match k with Some k -> k | None -> 0) in - (get_ea rA k S_NOEXT ii >>= fun addr -> - simd_ldr access_size addr r1 ii) >>= B.next1T + let access_size = tr_simd_variant var in + get_ea rA (K k) S_NOEXT ii >>= fun addr -> + simd_ldr access_size addr r1 ii >>= B.next1T | I_LDAPUR_SIMD(var,r1,rA,k) -> - let access_size = tr_simd_variant var and - k = K (match k with Some k -> k | None -> 0) in - (get_ea rA k S_NOEXT ii >>= fun addr -> - simd_ldar access_size addr r1 ii) >>= B.next1T - | I_STR_SIMD(var,r1,rA,kr,s) -> let access_size = tr_simd_variant var in - simd_str access_size rA r1 kr s ii - | I_STR_P_SIMD(var,r1,rA,k) -> + get_ea rA (K k) S_NOEXT ii >>= fun addr -> + simd_ldar access_size addr r1 ii >>= B.next1T + | I_STR_SIMD(var,r1,rA,MemExt.Reg (v,kr,sext,s)) -> + let access_size = tr_simd_variant var in + let ma = get_ea_reg rA v kr sext s ii in + simd_str access_size ma r1 ii + | I_STR_SIMD(var,r1,rA,MemExt.Imm (k,Idx)) -> + let access_size = tr_simd_variant var in + let ma = get_ea_idx rA k ii in + simd_str access_size ma r1 ii + | I_STR_SIMD(var,r1,rA,MemExt.Imm (k,PreIdx)) -> let access_size = tr_simd_variant var in - simd_str_p access_size rA r1 (K k) ii + let ma = get_ea_preindexed rA k ii in + simd_str access_size ma r1 ii + | I_STR_SIMD(var,r1,rA,MemExt.Imm (k,PostIdx)) -> + let access_size = tr_simd_variant var in + let ma = read_reg_ord rA ii in + simd_str_p access_size ma r1 rA (K k) ii | I_STUR_SIMD(var,r1,rA,k) -> - let access_size = tr_simd_variant var and - k = K (match k with Some k -> k | None -> 0) in - simd_str access_size rA r1 k S_NOEXT ii + let access_size = tr_simd_variant var in + let ma = get_ea_idx rA k ii in + simd_str access_size ma r1 ii | I_STLUR_SIMD(var,r1,rA,k) -> - let access_size = tr_simd_variant var and - k = K (match k with Some k -> k | None -> 0) in - simd_stlr access_size rA r1 k S_NOEXT ii - | I_LDP_SIMD(tnt,var,r1,r2,r3,k) -> - get_ea_idx r3 k ii >>= fun addr -> - simd_ldp tnt var addr r1 r2 ii - | I_LDP_P_SIMD(tnt,var,r1,r2,r3,k) -> - read_reg_ord r3 ii >>= fun addr -> - (simd_ldp tnt var addr r1 r2 ii >>| - post_kr r3 addr (K k) ii) >>= - fun (b,()) -> M.unitT b - | I_STP_SIMD(tnt,var,r1,r2,r3,k) -> - get_ea_idx r3 k ii >>= fun addr -> - simd_stp tnt var addr r1 r2 ii - | I_STP_P_SIMD(tnt,var,r1,r2,r3,k) -> - read_reg_ord r3 ii >>= fun addr -> - simd_stp tnt var addr r1 r2 ii >>| - post_kr r3 addr (K k) ii >>= - fun (b,()) -> M.unitT b - + let access_size = tr_simd_variant var in + let ma = get_ea_idx rA k ii in + simd_stlr access_size ma r1 ii + | I_LDP_SIMD(tnt,var,r1,r2,r3,idx) -> + begin + match idx with + | k,Idx -> + get_ea_idx r3 k ii >>= fun addr -> + simd_ldp tnt var addr r1 r2 ii + | k,PreIdx -> + get_ea_preindexed r3 k ii >>= fun addr -> + simd_ldp tnt var addr r1 r2 ii + | k,PostIdx -> + read_reg_ord r3 ii >>= fun addr -> + (simd_ldp tnt var addr r1 r2 ii >>| + post_kr r3 addr (K k) ii) >>= + fun (b,()) -> M.unitT b + end + | I_STP_SIMD(tnt,var,r1,r2,r3,idx) -> + begin + match idx with + | k,Idx -> + get_ea_idx r3 k ii >>= fun addr -> + simd_stp tnt var addr r1 r2 ii + | k,PreIdx -> + get_ea_preindexed r3 k ii >>= fun addr -> + simd_stp tnt var addr r1 r2 ii + | k,PostIdx -> + read_reg_ord r3 ii >>= fun addr -> + (simd_stp tnt var addr r1 r2 ii >>| + post_kr r3 addr (K k) ii) >>= + fun (b,()) -> M.unitT b + end (* Morello instructions *) | I_ALIGND(rd,rn,k) -> check_morello inst ; diff --git a/jingle/AArch64Arch_jingle.ml b/jingle/AArch64Arch_jingle.ml index 5f870e3c95..0c8b318e14 100644 --- a/jingle/AArch64Arch_jingle.ml +++ b/jingle/AArch64Arch_jingle.ml @@ -659,10 +659,9 @@ include Arch.MakeArch(struct | I_ST2 _ | I_ST2M _ | I_ST3 _ | I_ST3M _ | I_ST4 _ | I_ST4M _ - | I_LDP_SIMD _ | I_LDP_P_SIMD _ - | I_STP_SIMD _ | I_STP_P_SIMD _ - | I_LDR_SIMD _ | I_LDR_P_SIMD _ - | I_STR_SIMD _ | I_STR_P_SIMD _ + | I_LDP_SIMD _ + | I_STP_SIMD _ + | I_LDR_SIMD _ | I_STR_SIMD _ | I_LDUR_SIMD _ | I_LDAPUR_SIMD _ | I_STUR_SIMD _ | I_STLUR_SIMD _ | I_ADDV _ | I_DUP _ | I_FMOV_TG _ diff --git a/lib/AArch64Base.ml b/lib/AArch64Base.ml index 38449f0a91..0cfccde4d3 100644 --- a/lib/AArch64Base.ml +++ b/lib/AArch64Base.ml @@ -1110,18 +1110,14 @@ type 'k kinstruction = | I_ST3M of reg list * reg * 'k kr | I_ST4 of reg list * int * reg * 'k kr | I_ST4M of reg list * reg * 'k kr - | I_LDP_P_SIMD of temporal * simd_variant * reg * reg * reg * 'k - | I_STP_P_SIMD of temporal * simd_variant * reg * reg * reg * 'k - | I_LDP_SIMD of temporal * simd_variant * reg * reg * reg * 'k - | I_STP_SIMD of temporal * simd_variant * reg * reg * reg * 'k - | I_LDR_SIMD of simd_variant * reg * reg * 'k kr * 'k s - | I_LDR_P_SIMD of simd_variant * reg * reg * 'k - | I_STR_SIMD of simd_variant * reg * reg * 'k kr * 'k s - | I_STR_P_SIMD of simd_variant * reg * reg * 'k - | I_LDUR_SIMD of simd_variant * reg * reg * 'k option - | I_LDAPUR_SIMD of simd_variant * reg * reg * 'k option - | I_STUR_SIMD of simd_variant * reg * reg * 'k option - | I_STLUR_SIMD of simd_variant * reg * reg * 'k option + | I_LDP_SIMD of temporal * simd_variant * reg * reg * reg * 'k idx + | I_STP_SIMD of temporal * simd_variant * reg * reg * reg * 'k idx + | I_LDR_SIMD of simd_variant * reg * reg * 'k MemExt.ext + | I_STR_SIMD of simd_variant * reg * reg * 'k MemExt.ext + | I_LDUR_SIMD of simd_variant * reg * reg * 'k + | I_LDAPUR_SIMD of simd_variant * reg * reg * 'k + | I_STUR_SIMD of simd_variant * reg * reg * 'k + | I_STLUR_SIMD of simd_variant * reg * reg * 'k | I_ADDV of simd_variant * reg * reg | I_DUP of reg * variant * reg | I_FMOV_TG of variant * reg * simd_variant * reg @@ -1385,27 +1381,15 @@ let do_pp_instruction m = pp_vreg v r2 ^ "," ^ do_pp_idx true ra idx in - let pp_smem memo v rt ra kr = + let pp_smem memo v rt ra idx = pp_memo memo ^ " " ^ pp_vsimdreg v rt ^ - ",[" ^ pp_xreg ra ^ pp_kr false false kr ^ "]" in + "," ^ pp_addr ra idx in - let pp_smem_shift memo v rt ra kr s = - pp_memo memo ^ " " ^ pp_vsimdreg v rt ^ - ",[" ^ pp_xreg ra ^ pp_kr false false kr ^ - pp_barrel_shift "," s (m.pp_k) ^ "]" in - - let pp_smem_post memo v rt ra k = - pp_memo memo ^ " " ^ pp_vsimdreg v rt ^ - ",[" ^ pp_xreg ra ^ "]" ^ m.pp_k k in - - let pp_smemp_post memo v r1 r2 ra k = - pp_memo memo ^ " " ^ pp_vsimdreg v r1 ^ "," ^ pp_vsimdreg v r2 ^ - ",[" ^ pp_xreg ra ^ "]" ^ m.pp_k k in - - let pp_smemp memo v r1 r2 ra k = - pp_memo memo ^ " " ^ pp_vsimdreg v r1 ^ "," ^ pp_vsimdreg v r2 ^ - ",[" ^ pp_xreg ra ^ (if m.compat - then pp_kr false false (K k) else "," ^ m.pp_k k) ^ "]" in + let pp_smemp memo v r1 r2 ra idx = + pp_memo memo ^ " " ^ + pp_vsimdreg v r1 ^ "," ^ + pp_vsimdreg v r2 ^ "," ^ + pp_idx ra idx in let pp_vmem_shift memo r k s = pp_memo memo ^ " " ^ pp_simd_vector_reg r ^ "," ^ m.pp_k k ^ @@ -1565,42 +1549,22 @@ let do_pp_instruction m = pp_vmem_s "ST4" rs i r2 kr | I_ST4M (rs,r2,kr) -> pp_vmem_r_m "ST4" rs r2 kr - | I_LDP_P_SIMD (t,v,r1,r2,r3,k) -> - pp_smemp_post (match t with TT -> "LDP" | NT -> "LDNP") v r1 r2 r3 k - | I_STP_P_SIMD (t,v,r1,r2,r3,k) -> - pp_smemp_post (match t with TT -> "STP" | NT -> "STNP") v r1 r2 r3 k - | I_LDP_SIMD (t,v,r1,r2,r3,k) -> - pp_smemp (match t with TT -> "LDP" | NT -> "LDNP") v r1 r2 r3 k - | I_STP_SIMD (t,v,r1,r2,r3,k) -> - pp_smemp (match t with TT -> "STP" | NT -> "STNP") v r1 r2 r3 k - | I_LDR_SIMD (v,r1,r2,k,S_NOEXT) -> - pp_smem "LDR" v r1 r2 k - | I_LDR_SIMD (v,r1,r2,k,s) -> - pp_smem_shift "LDR" v r1 r2 k s - | I_LDR_P_SIMD (v,r1,r2,k) -> - pp_smem_post "LDR" v r1 r2 k - | I_STR_SIMD (v,r1,r2,k,S_NOEXT) -> - pp_smem "STR" v r1 r2 k - | I_STR_SIMD (v,r1,r2,k,s) -> - pp_smem_shift "STR" v r1 r2 k s - | I_STR_P_SIMD (v,r1,r2,k) -> - pp_smem_post "STR" v r1 r2 k - | I_LDUR_SIMD (v,r1,r2,None) -> - sprintf "LDUR %s,[%s]" (pp_vsimdreg v r1) (pp_reg r2) - | I_LDAPUR_SIMD (v,r1,r2,None) -> - sprintf "LDAPUR %s,[%s]" (pp_vsimdreg v r1) (pp_reg r2) - | I_LDAPUR_SIMD (v,r1,r2,Some(k)) -> - sprintf "LDAPUR %s,[%s,%s]" (pp_vsimdreg v r1) (pp_reg r2) (m.pp_k k) - | I_LDUR_SIMD (v,r1,r2,Some(k)) -> - sprintf "LDUR %s,[%s,%s]" (pp_vsimdreg v r1) (pp_reg r2) (m.pp_k k) - | I_STUR_SIMD (v,r1,r2,None) -> - sprintf "STUR %s,[%s]" (pp_vsimdreg v r1) (pp_reg r2) - | I_STUR_SIMD (v,r1,r2,Some(k)) -> - sprintf "STUR %s,[%s,%s]" (pp_vsimdreg v r1) (pp_reg r2) (m.pp_k k) - | I_STLUR_SIMD (v,r1,r2,None) -> - sprintf "STLUR %s,[%s]" (pp_vsimdreg v r1) (pp_reg r2) - | I_STLUR_SIMD (v,r1,r2,Some(k)) -> - sprintf "STLUR %s,[%s,%s]" (pp_vsimdreg v r1) (pp_reg r2) (m.pp_k k) + | I_LDP_SIMD (t,v,r1,r2,r3,idx) -> + pp_smemp (match t with TT -> "LDP" | NT -> "LDNP") v r1 r2 r3 idx + | I_STP_SIMD (t,v,r1,r2,r3,idx) -> + pp_smemp (match t with TT -> "STP" | NT -> "STNP") v r1 r2 r3 idx + | I_LDR_SIMD (v,r1,r2,idx) -> + pp_smem "LDR" v r1 r2 idx + | I_STR_SIMD (v,r1,r2,idx) -> + pp_smem "STR" v r1 r2 idx + | I_LDAPUR_SIMD (v,r1,r2,k) -> + sprintf "LDAPUR %s,[%s%s]" (pp_vsimdreg v r1) (pp_reg r2) (pp_kr false false (K k)) + | I_LDUR_SIMD (v,r1,r2,k) -> + sprintf "LDUR %s,[%s%s]" (pp_vsimdreg v r1) (pp_reg r2) (pp_kr false false (K k)) + | I_STUR_SIMD (v,r1,r2,k) -> + sprintf "STUR %s,[%s%s]" (pp_vsimdreg v r1) (pp_reg r2) (pp_kr false false (K k)) + | I_STLUR_SIMD (v,r1,r2,k) -> + sprintf "STLUR %s,[%s%s]" (pp_vsimdreg v r1) (pp_reg r2) (pp_kr false false (K k)) | I_ADDV (v,r1,r2) -> sprintf "ADDV %s,%s" (pp_vsimdreg v r1) (pp_simd_vector_reg r2) | I_DUP (r1,v,r2) -> @@ -1884,7 +1848,6 @@ let fold_regs (f_regs,f_sregs) = | I_REV (_,r1,r2) | I_LDUR (_, r1, r2, _) | I_CHKEQ (r1,r2) | I_CLRTAG (r1,r2) | I_GC (_,r1,r2) | I_LDCT (r1,r2) | I_STCT (r1,r2) - | I_LDR_P_SIMD (_,r1,r2,_) | I_STR_P_SIMD (_,r1,r2,_) | I_MOV_VE (r1,_,r2,_) | I_MOV_V (r1,r2) | I_MOV_TG (_,r1,r2,_) | I_MOV_FG (r1,_,_,r2) | I_MOV_S (_,r1,r2,_) | I_FMOV_TG (_,r1,_,r2) @@ -1896,14 +1859,14 @@ let fold_regs (f_regs,f_sregs) = | I_STZ2G (r1,r2,_) | I_STG (r1,r2,_) | I_ALIGND (r1,r2,_) | I_ALIGNU (r1,r2,_) -> fold_reg r1 (fold_reg r2 c) - | I_MRS (r,sr) | I_MSR (sr,r) -> fold_reg (SysReg sr) (fold_reg r c) - | I_LDR_SIMD (_,r1,r2,kr,_) | I_STR_SIMD(_,r1,r2,kr,_) - -> fold_reg r1 (fold_reg r2 (fold_kr kr c)) + | I_MRS (r,sr) | I_MSR (sr,r) + -> fold_reg (SysReg sr) (fold_reg r c) | I_OP3 (_,_,r1,r2,e) -> fold_reg r1 (fold_reg r2 (fold_op_ext e c)) | I_LDR (_,r1,r2,idx) | I_LDRSW (r1,r2,idx) | I_LDRS (_,r1,r2,idx) | I_STR (_,r1,r2,idx) | I_LDRBH (_,r1,r2,idx) | I_STRBH (_,r1,r2,idx) + | I_LDR_SIMD (_,r1,r2,idx) | I_STR_SIMD (_,r1,r2,idx) -> fold_reg r1 (fold_reg r2 (fold_idx idx c)) | I_LD1 (rs,_,r2,kr) | I_LD1M (rs,r2,kr) | I_LD1R (rs,r2,kr) | I_LDAP1 (rs,_,r2,kr) | I_LD2 (rs,_,r2,kr) | I_LD2M (rs,r2,kr) | I_LD2R (rs,r2,kr) @@ -1919,8 +1882,6 @@ let fold_regs (f_regs,f_sregs) = | I_BUILD (r1,r2,r3) | I_CPYTYPE (r1,r2,r3) | I_CPYVALUE (r1,r2,r3) | I_CSEAL (r1,r2,r3) | I_SEAL (r1,r2,r3) | I_UNSEAL (r1,r2,r3) | I_SC (_,r1,r2,r3) - | I_LDP_P_SIMD (_,_,r1,r2,r3,_) - | I_STP_P_SIMD (_,_,r1,r2,r3,_) | I_LDP_SIMD (_,_,r1,r2,r3,_) | I_STP_SIMD (_,_,r1,r2,r3,_) | I_EOR_SIMD (r1,r2,r3) | I_ADD_SIMD (r1,r2,r3) | I_ADD_SIMD_S (r1,r2,r3) @@ -2079,22 +2040,14 @@ let map_regs f_reg f_symb = I_ST4 (List.map map_reg rs,i,map_reg r2,map_kr kr) | I_ST4M (rs,r2,kr) -> I_ST4M (List.map map_reg rs,map_reg r2,map_kr kr) - | I_LDP_SIMD (t,v,r1,r2,r3,k) -> - I_LDP_SIMD (t,v,map_reg r1,map_reg r2,map_reg r3,k) - | I_STP_SIMD (t,v,r1,r2,r3,k) -> - I_STP_SIMD (t,v,map_reg r1,map_reg r2,map_reg r3,k) - | I_LDR_SIMD (v,r1,r2,kr,os) -> - I_LDR_SIMD (v,map_reg r1,map_reg r2,map_kr kr,os) - | I_LDR_P_SIMD (v,r1,r2,k) -> - I_LDR_P_SIMD (v,map_reg r1,map_reg r2,k) - | I_STR_SIMD (v,r1,r2,kr,os) -> - I_STR_SIMD (v,map_reg r1,map_reg r2,map_kr kr,os) - | I_STR_P_SIMD (v,r1,r2,k) -> - I_STR_P_SIMD (v,map_reg r1,map_reg r2,k) - | I_LDP_P_SIMD (t,v,r1,r2,r3,k) -> - I_LDP_P_SIMD (t,v,map_reg r1,map_reg r2,map_reg r3,k) - | I_STP_P_SIMD (t,v,r1,r2,r3,k) -> - I_STP_P_SIMD (t,v,map_reg r1,map_reg r2,map_reg r3,k) + | I_LDP_SIMD (t,v,r1,r2,r3,idx) -> + I_LDP_SIMD (t,v,map_reg r1,map_reg r2,map_reg r3,idx) + | I_STP_SIMD (t,v,r1,r2,r3,idx) -> + I_STP_SIMD (t,v,map_reg r1,map_reg r2,map_reg r3,idx) + | I_LDR_SIMD (v,r1,r2,idx) -> + I_LDR_SIMD (v,map_reg r1,map_reg r2,map_idx idx) + | I_STR_SIMD (v,r1,r2,idx) -> + I_STR_SIMD (v,map_reg r1,map_reg r2,map_idx idx) | I_LDUR_SIMD (v,r1,r2,k) -> I_LDUR_SIMD (v,map_reg r1, map_reg r2,k) | I_LDAPUR_SIMD (v,r1,r2,k) -> @@ -2331,10 +2284,9 @@ let get_next = | I_ST2 _ | I_ST2M _ | I_ST3 _ | I_ST3M _ | I_ST4 _ | I_ST4M _ - | I_LDP_P_SIMD _ | I_LDP_SIMD _ - | I_STP_P_SIMD _ | I_STP_SIMD _ - | I_LDR_SIMD _ | I_LDR_P_SIMD _ - | I_STR_SIMD _ | I_STR_P_SIMD _ + | I_LDP_SIMD _ + | I_STP_SIMD _ + | I_LDR_SIMD _ | I_STR_SIMD _ | I_LDUR_SIMD _ | I_LDAPUR_SIMD _ | I_STUR_SIMD _ | I_STLUR_SIMD _ | I_ADDV _ | I_DUP _ | I_FMOV_TG _ @@ -2629,22 +2581,14 @@ module PseudoI = struct | I_ST3M (rs,r2,kr) -> I_ST3M (rs,r2,kr_tr kr) | I_ST4 (rs,i,r2,kr) -> I_ST4 (rs,i,r2,kr_tr kr) | I_ST4M (rs,r2,kr) -> I_ST4M (rs,r2,kr_tr kr) - | I_LDP_P_SIMD (t,v,r1,r2,r3,k) -> I_LDP_P_SIMD (t,v,r1,r2,r3,k_tr k) - | I_STP_P_SIMD (t,v,r1,r2,r3,k) -> I_STP_P_SIMD (t,v,r1,r2,r3,k_tr k) - | I_LDP_SIMD (t,v,r1,r2,r3,k) -> I_LDP_SIMD (t,v,r1,r2,r3,k_tr k) - | I_STP_SIMD (t,v,r1,r2,r3,k) -> I_STP_SIMD (t,v,r1,r2,r3,k_tr k) - | I_LDR_SIMD (v,r1,r2,kr,s) -> I_LDR_SIMD (v,r1,r2,kr_tr kr,ap_shift k_tr s) - | I_LDR_P_SIMD (v,r1,r2,k) -> I_LDR_P_SIMD (v,r1,r2,k_tr k) - | I_STR_SIMD (v,r1,r2,kr,s) -> I_STR_SIMD (v,r1,r2,kr_tr kr,ap_shift k_tr s) - | I_STR_P_SIMD (v,r1,r2,k) -> I_STR_P_SIMD (v,r1,r2,k_tr k) - | I_LDUR_SIMD (v,r1,r2,None) -> I_LDUR_SIMD (v,r1,r2,None) - | I_LDUR_SIMD (v,r1,r2,Some(k)) -> I_LDUR_SIMD (v,r1,r2,Some(k_tr k)) - | I_LDAPUR_SIMD (v,r1,r2,None) -> I_LDAPUR_SIMD (v,r1,r2,None) - | I_LDAPUR_SIMD (v,r1,r2,Some(k)) -> I_LDAPUR_SIMD (v,r1,r2,Some(k_tr k)) - | I_STUR_SIMD (v,r1,r2,None) -> I_STUR_SIMD (v,r1,r2,None) - | I_STUR_SIMD (v,r1,r2,Some(k)) -> I_STUR_SIMD (v,r1,r2,Some(k_tr k)) - | I_STLUR_SIMD (v,r1,r2,None) -> I_STLUR_SIMD (v,r1,r2,None) - | I_STLUR_SIMD (v,r1,r2,Some(k)) -> I_STLUR_SIMD (v,r1,r2,Some(k_tr k)) + | I_LDP_SIMD (t,v,r1,r2,r3,idx) -> I_LDP_SIMD (t,v,r1,r2,r3,idx_tr idx) + | I_STP_SIMD (t,v,r1,r2,r3,idx) -> I_STP_SIMD (t,v,r1,r2,r3,idx_tr idx) + | I_LDR_SIMD (v,r1,r2,idx) -> I_LDR_SIMD (v,r1,r2,ext_tr idx) + | I_STR_SIMD (v,r1,r2,idx) -> I_STR_SIMD (v,r1,r2,ext_tr idx) + | I_LDUR_SIMD (v,r1,r2,k) -> I_LDUR_SIMD (v,r1,r2,k_tr k) + | I_LDAPUR_SIMD (v,r1,r2,k) -> I_LDAPUR_SIMD (v,r1,r2,k_tr k) + | I_STUR_SIMD (v,r1,r2,k) -> I_STUR_SIMD (v,r1,r2,k_tr k) + | I_STLUR_SIMD (v,r1,r2,k) -> I_STLUR_SIMD (v,r1,r2,k_tr k) | I_MOVI_V (r,k,s) -> I_MOVI_V (r,k_tr k,ap_shift k_tr s) | I_MOVI_S (v,r,k) -> I_MOVI_S (v,r,k_tr k) | I_EOR_SIMD (r1,r2,r3) -> I_EOR_SIMD (r1,r2,r3) @@ -2693,9 +2637,6 @@ module PseudoI = struct | I_LD2 _ | I_LD2R _ | I_ST2 _ -> 2 - (* reads, stores, then post-index stores *) - | I_LDP_P_SIMD _ | I_STP_P_SIMD _ - | I_LDR_P_SIMD _ | I_STR_P_SIMD _ | I_LD3 _ | I_LD3R _ | I_ST3 _ | I_CASP _ diff --git a/lib/AArch64Parser.mly b/lib/AArch64Parser.mly index da90692970..6f2e2f6ec3 100644 --- a/lib/AArch64Parser.mly +++ b/lib/AArch64Parser.mly @@ -274,26 +274,11 @@ kr: | xreg { RV (V64,$1) } | wreg { RV (V32,$1) } -kr_shift_address: -| k { K $1, S_NOEXT } -| xreg { (RV (V64,$1)), S_NOEXT } -| wreg COMMA shift { (RV (V32, $1)), $3 } -| xreg COMMA shift { (RV (V64, $1)), $3 } - -(* For address argument only *) -kr0: -| { K (MetaConst.zero), S_NOEXT } -| COMMA kr_shift_address { $2 } - kx0_no_shift: | { K (MetaConst.zero) } | COMMA k { K $2 } | COMMA xreg { RV (V64,$2) } -k0_no_shift: -| { K (MetaConst.zero) } -| COMMA k { K $2 } - %inline op_ext_shift: | TOK_LSL k { OpExt.LSL $2 } | TOK_LSR k { OpExt.LSR $2} @@ -431,22 +416,9 @@ ldp_instr: ldp_simd_instr: | LDP - { ( fun v r1 r2 r3 k0 k0' -> - match k0',k0 with - | Some post,None -> - I_LDP_P_SIMD (TT,v,r1,r2,r3,post) - | None,Some k -> I_LDP_SIMD (TT,v,r1,r2,r3,k) - | None,None -> I_LDP_SIMD (TT,v,r1,r2,r3,MetaConst.zero) - | _,_ -> assert false - )} + { ( fun v r1 r2 (r3,idx) -> I_LDP_SIMD (TT,v,r1,r2,r3,idx)) } | LDNP - { ( fun v r1 r2 r3 k0 k0' -> - match k0', k0 with - | None,Some k -> I_LDP_SIMD (NT,v,r1,r2,r3,k) - | None,None -> I_LDP_SIMD (NT,v,r1,r2,r3,MetaConst.zero) - | _,_-> assert false - - )} + { ( fun v r1 r2 (r3,idx) -> I_LDP_SIMD (NT,v,r1,r2,r3,idx)) } stp_instr: | STP @@ -467,19 +439,9 @@ stp_instr: stp_simd_instr: | STP - { ( fun v r1 r2 r3 k0 k0' -> - match k0',k0 with - | Some post, K k when k = MetaConst.zero -> - I_STP_P_SIMD (TT,v,r1,r2,r3,post) - | None, K k -> I_STP_SIMD (TT,v,r1,r2,r3,k) - | _,_ -> assert false - )} + { ( fun v r1 r2 (r3,idx) -> I_STP_SIMD (TT,v,r1,r2,r3,idx)) } | STNP - { ( fun v r1 r2 r3 k0 k0' -> - match k0',k0 with - | None,K k -> I_STP_SIMD (NT,v,r1,r2,r3,k) - | _,_ -> assert false - )} + { ( fun v r1 r2 (r3,idx) -> I_STP_SIMD (NT,v,r1,r2,r3,idx)) } cond: | TOK_EQ { EQ } @@ -706,44 +668,32 @@ instr: { I_ST4 ($2, $3, $6, $8) } | ST4 vregs4 COMMA LBRK xreg RBRK kx0_no_shift { I_ST4M ($2, $5, $7) } -| ldp_simd_instr sreg COMMA sreg COMMA LBRK xreg k0_opt RBRK k0_opt - { $1 VSIMD32 $2 $4 $7 $8 $10 } -| ldp_simd_instr dreg COMMA dreg COMMA LBRK xreg k0_opt RBRK k0_opt - { $1 VSIMD64 $2 $4 $7 $8 $10 } -| ldp_simd_instr qreg COMMA qreg COMMA LBRK xreg k0_opt RBRK k0_opt - { $1 VSIMD128 $2 $4 $7 $8 $10 } -| stp_simd_instr sreg COMMA sreg COMMA LBRK xreg k0_no_shift RBRK k0_opt - { $1 VSIMD32 $2 $4 $7 $8 $10 } -| stp_simd_instr dreg COMMA dreg COMMA LBRK xreg k0_no_shift RBRK k0_opt - { $1 VSIMD64 $2 $4 $7 $8 $10 } -| stp_simd_instr qreg COMMA qreg COMMA LBRK xreg k0_no_shift RBRK k0_opt - { $1 VSIMD128 $2 $4 $7 $8 $10 } -| LDR scalar_regs COMMA LBRK xreg kr0 RBRK k0_opt - { let v,r = $2 in - let kr, os = $6 in - match $8 with - | Some post when kr = K MetaConst.zero -> - I_LDR_P_SIMD (v,r,$5,post) - | _ -> - I_LDR_SIMD (v,r,$5,kr,os) } -| LDUR scalar_regs COMMA LBRK xreg k0_opt RBRK +| ldp_simd_instr sreg COMMA sreg COMMA mem_idx + { $1 VSIMD32 $2 $4 $6 } +| ldp_simd_instr dreg COMMA dreg COMMA mem_idx + { $1 VSIMD64 $2 $4 $6 } +| ldp_simd_instr qreg COMMA qreg COMMA mem_idx + { $1 VSIMD128 $2 $4 $6 } +| stp_simd_instr sreg COMMA sreg COMMA mem_idx + { $1 VSIMD32 $2 $4 $6 } +| stp_simd_instr dreg COMMA dreg COMMA mem_idx + { $1 VSIMD64 $2 $4 $6 } +| stp_simd_instr qreg COMMA qreg COMMA mem_idx + { $1 VSIMD128 $2 $4 $6 } +| LDR scalar_regs COMMA mem_ea + { let (v,r) = $2 and (ra,ext) = $4 in I_LDR_SIMD (v,r,ra,ext) } +| LDUR scalar_regs COMMA LBRK xreg k0 RBRK { let v,r = $2 in I_LDUR_SIMD (v, r, $5, $6) } -| LDAPUR scalar_regs COMMA LBRK xreg k0_opt RBRK +| LDAPUR scalar_regs COMMA LBRK xreg k0 RBRK { let v,r = $2 in I_LDAPUR_SIMD (v, r, $5, $6) } -| STR scalar_regs COMMA LBRK xreg kr0 RBRK k0_opt - { let v,r = $2 in - let kr, os = $6 in - match $8 with - | Some post when kr = K MetaConst.zero -> - I_STR_P_SIMD (v,r,$5,post) - | _ -> - I_STR_SIMD (v,r,$5,kr,os) } -| STUR scalar_regs COMMA LBRK xreg k0_opt RBRK +| STR scalar_regs COMMA mem_ea + { let (v,r) = $2 and (ra,ext) = $4 in I_STR_SIMD (v,r,ra,ext) } +| STUR scalar_regs COMMA LBRK xreg k0 RBRK { let v,r = $2 in I_STUR_SIMD (v, r, $5, $6) } -| STLUR scalar_regs COMMA LBRK xreg k0_opt RBRK +| STLUR scalar_regs COMMA LBRK xreg k0 RBRK { let v,r = $2 in I_STLUR_SIMD (v, r, $5, $6) } | ADDV breg COMMA vreg diff --git a/litmus/AArch64Compile_litmus.ml b/litmus/AArch64Compile_litmus.ml index 0a9602fe1c..14e8542536 100644 --- a/litmus/AArch64Compile_litmus.ml +++ b/litmus/AArch64Compile_litmus.ml @@ -548,51 +548,59 @@ module Make(V:Constant.S)(C:Config) = let print_simd_list rs io offset = String.concat "," (List.mapi (print_simd_reg io offset) rs) - let load_simd memo v r1 r2 k os = match k,os with - | K 0,S_NOEXT -> - { empty_ins with - memo = sprintf "%s %s,[^i0]" memo (print_vecreg v "o" 0); - inputs = [r2]; - outputs = [r1]; - reg_env = [(r1,int32x4_t);(r2,voidstar)]} - | K k,S_NOEXT -> - { empty_ins with - memo = sprintf "%s %s,[^i0,#%i]" memo (print_vecreg v "o" 0) k; - inputs = [r2]; - outputs = [r1]; - reg_env = [(r1,int32x4_t);(r2,voidstar)]} - | RV (V32,rk),S_NOEXT -> - { empty_ins with - memo = sprintf "%s %s,[^i0,^wi1]" memo (print_vecreg v "o" 0); - inputs = [r2;rk;]; - outputs = [r1]; - reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,word)]} - | RV (V32,rk),s -> - { empty_ins with - memo = sprintf "%s %s,[^i0,^wi1,%s]" memo (print_vecreg v "o" 0) (pp_shifter s); - inputs = [r2;rk;]; - outputs = [r1]; - reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,word)]} - | RV (V64,rk), S_NOEXT -> - { empty_ins with - memo = sprintf "%s %s,[^i0,^i1]" memo (print_vecreg v "o" 0); - inputs = [r2;rk;]; - outputs = [r1]; - reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,quad)]} - | RV (V64,rk), s -> - { empty_ins with - memo = sprintf "%s %s,[^i0,^i1,%s]" memo (print_vecreg v "o" 0) (pp_shifter s); - inputs = [r2;rk;]; - outputs = [r1]; - reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,quad)]} - | _ -> assert false + let load_simd memo v r1 r2 idx = + let open MemExt in + match idx with + | Imm(0,Idx) -> + { empty_ins with + memo = sprintf "%s %s,[^i0]" memo (print_vecreg v "o" 0); + inputs = [r2]; + outputs = [r1]; + reg_env = [(r1,int32x4_t);(r2,voidstar)]} + | Imm (k,Idx) -> + { empty_ins with + memo = sprintf "%s %s,[^i0,#%i]" memo (print_vecreg v "o" 0) k; + inputs = [r2]; + outputs = [r1]; + reg_env = [(r1,int32x4_t);(r2,voidstar)]} + | Imm (k,PostIdx) -> + { empty_ins with + memo = sprintf "%s %s,[^i0],#%i" memo (print_vecreg v "o" 0) k; + inputs = [r2]; + outputs = [r1]; + reg_env = [(r1,int32x4_t);(r2,voidstar)]} + | Imm (k,PreIdx) -> + { empty_ins with + memo = sprintf "%s %s,[^i0,#%i]!" memo (print_vecreg v "o" 0) k; + inputs = [r2]; + outputs = [r1]; + reg_env = [(r1,int32x4_t);(r2,voidstar)]} + | Reg (V32,rk,se,0) -> + { empty_ins with + memo = sprintf "%s %s,[^i0,^wi1,%s]" memo (print_vecreg v "o" 0) (pp_mem_sext se); + inputs = [r2;rk;]; + outputs = [r1]; + reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,word)]} + | Reg (V32,rk,se,k) -> + { empty_ins with + memo = sprintf "%s %s,[^i0,^wi1,%s #%d]" memo (print_vecreg v "o" 0) (pp_mem_sext se) k; + inputs = [r2;rk;]; + outputs = [r1]; + reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,word)]} + | Reg (V64,rk,se,0) -> + { empty_ins with + memo = sprintf "%s %s,[^i0,^i1,%s]" memo (print_vecreg v "o" 0) (pp_mem_sext se); + inputs = [r2;rk;]; + outputs = [r1]; + reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,quad)]} + | Reg (V64,rk,se,k) -> + { empty_ins with + memo = sprintf "%s %s,[^i0,^i1,%s #%d]" memo (print_vecreg v "o" 0) (pp_mem_sext se) k; + inputs = [r2;rk;]; + outputs = [r1]; + reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,quad)]} + | _ -> assert false - let load_simd_p v r1 r2 k = - { empty_ins with - memo = sprintf "ldr %s, [^i0],#%i" (print_vecreg v "o" 0) k; - inputs = [r2]; - outputs = [r1]; - reg_env = [(r1,int32x4_t);(r2,voidstar)]} let load_simd_s memo rs i rA kr = match kr with | K 0 -> @@ -636,111 +644,84 @@ module Make(V:Constant.S)(C:Config) = reg_env = (add_128 rs) @ [(rA,voidstar);(rB,quad)]} | _ -> Warn.fatal "Illegal form of %s instruction" memo - let load_pair_simd memo v r1 r2 r3 k = match v,k with - | VSIMD32,0 -> + let load_pair_simd memo v r1 r2 r3 idx = match idx with + | 0,Idx -> { empty_ins with - memo = sprintf "%s %s,%s,[^i0]" memo (print_vecreg VSIMD32 "o" 0) (print_vecreg VSIMD32 "o" 1); + memo = sprintf "%s %s,%s,[^i0]" memo (print_vecreg v "o" 0) (print_vecreg v "o" 1); inputs=[r3]; outputs=[r1;r2;]; reg_env= (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD32,k -> + | k,Idx -> { empty_ins with - memo = sprintf "%s %s,%s,[^i0,#%i]" memo (print_vecreg VSIMD32 "o" 0) (print_vecreg VSIMD32 "o" 1) k; + memo = sprintf "%s %s,%s,[^i0,#%i]" memo (print_vecreg v "o" 0) (print_vecreg v "o" 1) k; inputs=[r3]; outputs=[r1;r2;]; reg_env= (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD64,0 -> + | k,PostIdx -> { empty_ins with - memo = sprintf "%s %s,%s,[^i0]" memo (print_vecreg VSIMD64 "o" 0) (print_vecreg VSIMD64 "o" 1); + memo = sprintf "%s %s,%s,[^i0],#%i" memo (print_vecreg v "o" 0) (print_vecreg v "o" 1) k; inputs=[r3]; outputs=[r1;r2;]; reg_env= (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD64,k -> + | k,PreIdx -> { empty_ins with - memo = sprintf "%s %s,%s,[^i0,#%i]" memo (print_vecreg VSIMD64 "o" 0) (print_vecreg VSIMD64 "o" 1) k; + memo = sprintf "%s %s,%s,[^i0,#%i]!" memo (print_vecreg v "o" 0) (print_vecreg v "o" 1) k; inputs=[r3]; outputs=[r1;r2;]; reg_env= (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD128,0 -> + + let store_simd memo v r1 r2 idx = + let open MemExt in + match idx with + | Imm (0,Idx) -> { empty_ins with - memo = sprintf "%s %s,%s,[^i0]" memo (print_vecreg VSIMD128 "o" 0) (print_vecreg VSIMD128 "o" 1); - inputs=[r3]; - outputs=[r1;r2;]; - reg_env= (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD128,k -> + memo = sprintf "%s %s,[^i1]" memo (print_vecreg v "i" 0); + inputs = [r1;r2]; + outputs = []; + reg_env = [(r1,int32x4_t);(r2,voidstar)]} + | Imm (k,Idx) -> { empty_ins with - memo = sprintf "%s %s,%s,[^i0,#%i]" memo (print_vecreg VSIMD128 "o" 0) (print_vecreg VSIMD128 "o" 1) k; - inputs=[r3]; - outputs=[r1;r2;]; - reg_env= (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | _, _ -> assert false - - let load_pair_p_simd memo v r1 r2 r3 k = match v with - | VSIMD32 -> + memo = sprintf "%s %s,[^i1,#%i]" memo (print_vecreg v "i" 0) k; + inputs = [r1;r2]; + outputs = []; + reg_env = [(r1,int32x4_t);(r2,voidstar)]} + | Imm (k,PostIdx) -> { empty_ins with - memo = sprintf "%s %s,%s,[^i0],#%i" memo (print_vecreg VSIMD32 "o" 0) (print_vecreg VSIMD32 "o" 1) k; - inputs=[r3]; - outputs=[r1;r2;]; - reg_env= (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD64 -> + memo = sprintf "%s %s,[^i1],#%i" memo (print_vecreg v "i" 0) k; + inputs = [r1;r2]; + outputs = []; + reg_env = [(r1,int32x4_t);(r2,voidstar)]} + | Imm (k,PreIdx) -> { empty_ins with - memo = sprintf "%s %s,%s,[^i0],#%i" memo (print_vecreg VSIMD64 "o" 0) (print_vecreg VSIMD64 "o" 1) k; - inputs=[r3]; - outputs=[r1;r2;]; - reg_env= (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD128 -> + memo = sprintf "%s %s,[^i1,#%i]!" memo (print_vecreg v "i" 0) k; + inputs = [r1;r2]; + outputs = []; + reg_env = [(r1,int32x4_t);(r2,voidstar)]} + | Reg (V32,rk,se,0) -> { empty_ins with - memo = sprintf "%s %s,%s,[^i0],#%i" memo (print_vecreg VSIMD128 "o" 0) (print_vecreg VSIMD128 "o" 1) k; - inputs=[r3]; - outputs=[r1;r2;]; - reg_env= (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | _ -> assert false - - let store_simd memo v r1 r2 k os = match k,os with - | K 0,S_NOEXT -> - { empty_ins with - memo = sprintf "%s %s,[^i1]" memo (print_vecreg v "i" 0); - inputs = [r1;r2]; - outputs = []; - reg_env = [(r1,int32x4_t);(r2,voidstar)]} - | K k,S_NOEXT -> - { empty_ins with - memo = sprintf "%s %s,[^i1,#%i]" memo (print_vecreg v "i" 0) k; - inputs = [r1;r2]; - outputs = []; - reg_env = [(r1,int32x4_t);(r2,voidstar)]} - | RV (V32,rk),S_NOEXT -> - { empty_ins with - memo = sprintf "%s %s,[^i1,^wi2]" memo (print_vecreg v "i" 0); - inputs = [r1;r2;rk]; - outputs = []; - reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,word)]} - | RV (V32,rk),s -> - { empty_ins with - memo = sprintf "%s %s,[^i1,^wi2,%s]" memo (print_vecreg v "i" 0) (pp_shifter s); - inputs = [r1;r2;rk]; - outputs = []; - reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,word)]} - | RV (V64,rk),S_NOEXT -> - { empty_ins with - memo = sprintf "%s %s,[^i1,^i2]" memo (print_vecreg v "i" 0); - inputs = [r1;r2;rk]; - outputs = []; - reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,quad)]} - | RV (V64,rk),s -> - { empty_ins with - memo = sprintf "%s %s,[^i1,^i2,%s]" memo (print_vecreg v "i" 0) (pp_shifter s); - inputs = [r1;r2;rk]; - outputs = []; - reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,quad)]} - | _ -> assert false - - let store_simd_p v r1 r2 k1 = - { empty_ins with - memo = sprintf "str %s, [^i1],%i" (print_vecreg v "i" 0) k1; - inputs = [r1;r2]; - outputs = []; - reg_env = [(r1,int32x4_t);(r2,voidstar)]} + memo = sprintf "%s %s,[^i1,^wi2,%s]" memo (print_vecreg v "i" 0) (pp_mem_sext se); + inputs = [r1;r2;rk]; + outputs = []; + reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,word)]} + | Reg (V32,rk,se,k) -> + { empty_ins with + memo = sprintf "%s %s,[^i1,^wi2,%s #%d]" memo (print_vecreg v "i" 0) (pp_mem_sext se) k; + inputs = [r1;r2;rk]; + outputs = []; + reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,word)]} + | Reg (V64,rk,se,0) -> + { empty_ins with + memo = sprintf "%s %s,[^i1,^i2,%s]" memo (print_vecreg v "i" 0) (pp_mem_sext se); + inputs = [r1;r2;rk]; + outputs = []; + reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,quad)]} + | Reg (V64,rk,se,k) -> + { empty_ins with + memo = sprintf "%s %s,[^i1,^i2,%s #%d]" memo (print_vecreg v "i" 0) (pp_mem_sext se) k; + inputs = [r1;r2;rk]; + outputs = []; + reg_env = [(r1,int32x4_t);(r2,voidstar);(rk,quad)]} + | _ -> assert false let store_simd_s memo rs i rA kr = match kr with | K 0 -> @@ -778,65 +759,31 @@ module Make(V:Constant.S)(C:Config) = reg_env = [(rA,voidstar);(rB,quad)] @ (add_128 rs)} | _ -> Warn.fatal "Illegal form of %s instruction" memo - let store_pair_simd memo v r1 r2 r3 k = match v,k with - | VSIMD32,0 -> - { empty_ins with - memo = sprintf "%s %s,%s,[^i2]" memo (print_vecreg VSIMD32 "i" 0) (print_vecreg VSIMD32 "i" 1); - inputs = [r1;r2;r3]; - outputs = []; - reg_env = (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD32,k -> - { empty_ins with - memo = sprintf "%s %s,%s,[^i2,#%i]" memo (print_vecreg VSIMD32 "i" 0) (print_vecreg VSIMD32 "i" 1) k; - inputs = [r1;r2;r3]; - outputs = []; - reg_env = (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD64,0 -> - { empty_ins with - memo = sprintf "%s %s,%s,[^i2]" memo (print_vecreg VSIMD64 "i" 0) (print_vecreg VSIMD64 "i" 1); - inputs = [r1;r2;r3]; - outputs = []; - reg_env = (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD64,k -> - { empty_ins with - memo = sprintf "%s %s,%s,[^i2,#%i]" memo (print_vecreg VSIMD64 "i" 0) (print_vecreg VSIMD64 "i" 1) k; - inputs = [r1;r2;r3]; - outputs = []; - reg_env = (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD128,0 -> - { empty_ins with - memo = sprintf "%s %s,%s,[^i2]" memo (print_vecreg VSIMD128 "i" 0) (print_vecreg VSIMD128 "i" 1); - inputs = [r1;r2;r3]; - outputs = []; - reg_env = (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD128,k -> + let store_pair_simd memo v r1 r2 r3 idx = match idx with + | 0,Idx -> { empty_ins with - memo = sprintf "%s %s,%s,[^i2,#%i]" memo (print_vecreg VSIMD128 "i" 0) (print_vecreg VSIMD128 "i" 1) k; + memo = sprintf "%s %s,%s,[^i2]" memo (print_vecreg v "i" 0) (print_vecreg v "i" 1); inputs = [r1;r2;r3]; outputs = []; reg_env = (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | _,_ -> assert false - - let store_pair_p_simd memo v r1 r2 r3 k = match v with - | VSIMD32 -> + | k,Idx -> { empty_ins with - memo = sprintf "%s %s,%s,[^i2],#%i" memo (print_vecreg VSIMD32 "i" 0) (print_vecreg VSIMD32 "i" 1) k; + memo = sprintf "%s %s,%s,[^i2,#%i]" memo (print_vecreg v "i" 0) (print_vecreg v "i" 1) k; inputs = [r1;r2;r3]; outputs = []; reg_env = (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD64 -> + | k,PostIdx -> { empty_ins with - memo = sprintf "%s %s,%s,[^i2],#%i" memo (print_vecreg VSIMD64 "i" 0) (print_vecreg VSIMD64 "i" 1) k; + memo = sprintf "%s %s,%s,[^i2],#%i" memo (print_vecreg v "i" 0) (print_vecreg v "i" 1) k; inputs = [r1;r2;r3]; outputs = []; reg_env = (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | VSIMD128 -> + | k,PreIdx -> { empty_ins with - memo = sprintf "%s %s,%s,[^i2],#%i" memo (print_vecreg VSIMD128 "i" 0) (print_vecreg VSIMD128 "i" 1) k; + memo = sprintf "%s %s,%s,[^i2,#%i]!" memo (print_vecreg v "i" 0) (print_vecreg v "i" 1) k; inputs = [r1;r2;r3]; outputs = []; reg_env = (add_128 [r1;r2;]) @ [(r3,voidstar)]} - | _ -> assert false let addv_simd v r1 r2 = { empty_ins with @@ -1363,26 +1310,16 @@ module Make(V:Constant.S)(C:Config) = | I_ST3M (rs,r2,kr) -> store_simd_m "st3" rs r2 kr::k | I_ST4 (rs,i,r2,kr) -> store_simd_s "st4" rs i r2 kr::k | I_ST4M (rs,r2,kr) -> store_simd_m "st4" rs r2 kr::k - | I_LDP_SIMD (t,v,r1,r2,r3,kr) -> - load_pair_simd (match t with TT -> "ldp" | NT -> "ldnp") v r1 r2 r3 kr::k - | I_STP_SIMD (t,v,r1,r2,r3,kr) -> - store_pair_simd (match t with TT -> "stp" | NT -> "stnp") v r1 r2 r3 kr::k - | I_LDP_P_SIMD (t,v,r1,r2,r3,k1) -> - load_pair_p_simd (match t with TT -> "ldp" | NT -> "ldnp") v r1 r2 r3 k1::k - | I_STP_P_SIMD (t,v,r1,r2,r3,k1) -> - store_pair_p_simd (match t with TT -> "stp" | NT -> "stnp") v r1 r2 r3 k1::k - | I_LDR_SIMD (v,r1,r2,k1,s) -> load_simd "ldr" v r1 r2 k1 s::k - | I_LDR_P_SIMD (v,r1,r2,k1) -> load_simd_p v r1 r2 k1::k - | I_STR_SIMD (v,r1,r2,k1,s) -> store_simd "str" v r1 r2 k1 s::k - | I_STR_P_SIMD (v,r1,r2,k1) -> store_simd_p v r1 r2 k1::k - | I_LDUR_SIMD (v,r1,r2,Some(k1)) -> load_simd "ldur" v r1 r2 (K k1) S_NOEXT::k - | I_LDUR_SIMD (v,r1,r2,None) -> load_simd "ldur" v r1 r2 (K 0) S_NOEXT::k - | I_LDAPUR_SIMD (v,r1,r2,Some(k1)) -> load_simd "ldapur" v r1 r2 (K k1) S_NOEXT::k - | I_LDAPUR_SIMD (v,r1,r2,None) -> load_simd "ldapur" v r1 r2 (K 0) S_NOEXT::k - | I_STUR_SIMD (v,r1,r2,Some(k1)) -> store_simd "stur" v r1 r2 (K k1) S_NOEXT::k - | I_STUR_SIMD (v,r1,r2,None) -> store_simd "stur" v r1 r2 (K 0) S_NOEXT::k - | I_STLUR_SIMD (v,r1,r2,Some(k1)) -> store_simd "stlur" v r1 r2 (K k1) S_NOEXT::k - | I_STLUR_SIMD (v,r1,r2,None) -> store_simd "stlur" v r1 r2 (K 0) S_NOEXT::k + | I_LDP_SIMD (t,v,r1,r2,r3,idx) -> + load_pair_simd (match t with TT -> "ldp" | NT -> "ldnp") v r1 r2 r3 idx::k + | I_STP_SIMD (t,v,r1,r2,r3,idx) -> + store_pair_simd (match t with TT -> "stp" | NT -> "stnp") v r1 r2 r3 idx::k + | I_LDR_SIMD (v,r1,r2,idx) -> load_simd "ldr" v r1 r2 idx::k + | I_STR_SIMD (v,r1,r2,idx) -> store_simd "str" v r1 r2 idx::k + | I_LDUR_SIMD (v,r1,r2,k1) -> load_simd "ldur" v r1 r2 (MemExt.k2idx k1)::k + | I_LDAPUR_SIMD (v,r1,r2,k1) -> load_simd "ldapur" v r1 r2 (MemExt.k2idx k1)::k + | I_STUR_SIMD (v,r1,r2,k1) -> store_simd "stur" v r1 r2 (MemExt.k2idx k1)::k + | I_STLUR_SIMD (v,r1,r2,k1) -> store_simd "stlur" v r1 r2 (MemExt.k2idx k1)::k | I_ADDV (v,r1,r2) -> addv_simd v r1 r2::k | I_DUP (r1,v,r2) -> dup_simd_v r1 v r2::k | I_FMOV_TG (v1,r1,v2,r2) -> fmov_simd_tg v1 r1 v2 r2::k