diff --git a/Makefile b/Makefile index d12061406..f764835fe 100644 --- a/Makefile +++ b/Makefile @@ -140,6 +140,18 @@ test.neon:: $(REGRESSION_TEST_MODE) @ echo "herd7 AArch64 NEON instructions tests: OK" +test:: test.sve +test.sve:: + @ echo + $(HERD_REGRESSION_TEST) \ + -j $(J) \ + -herd-path $(HERD) \ + -libdir-path ./herd/libdir \ + -litmus-dir ./herd/tests/instructions/AArch64.sve \ + -variant sve \ + $(REGRESSION_TEST_MODE) + @ echo "herd7 AArch64 SVE instructions tests: OK" + test:: test.mte test.mte:: @ echo diff --git a/gen/AArch64Arch_gen.ml b/gen/AArch64Arch_gen.ml index 0f5fab7e5..c1dc9f612 100644 --- a/gen/AArch64Arch_gen.ml +++ b/gen/AArch64Arch_gen.ml @@ -35,6 +35,7 @@ let do_morello = C.variant Variant_gen.Morello let do_fullkvm = C.variant Variant_gen.FullKVM let do_kvm = do_fullkvm || C.variant Variant_gen.KVM let do_neon = C.variant Variant_gen.Neon +let do_sve = C.variant Variant_gen.SVE let do_mixed = Variant_gen.is_mixed C.variant let do_cu = C.variant Variant_gen.ConstrainedUnpredictable @@ -60,7 +61,7 @@ module Mixed = let bellatom = false module SIMD = struct - type atom = NeP|NeAcqPc|NeRel|Ne1|Ne2|Ne3|Ne4|Ne2i|Ne3i|Ne4i|NePa|NePaN + type atom = SvV|Sv1|Sv2i|Sv3i|Sv4i|NeP|NeAcqPc|NeRel|Ne1|Ne2|Ne3|Ne4|Ne2i|Ne3i|Ne4i|NePa|NePaN let fold_neon f r = r |> f NeAcqPc |> f NeRel |> @@ -69,19 +70,25 @@ module SIMD = struct f Ne1 |> f Ne2 |> f Ne3 |> f Ne4 |> f Ne2i |> f Ne3i |> f Ne4i + let fold_sve f r = r |> + f SvV |> f Sv1 |> + f Sv2i |> f Sv3i |> f Sv4i + let nregs = function - | Ne1 -> 1 - | Ne2 | Ne2i -> 2 - | Ne3 | Ne3i -> 3 - | Ne4 | Ne4i -> 4 + | SvV | Sv1 | Ne1 -> 1 + | Sv2i | Ne2 | Ne2i -> 2 + | Sv3i | Ne3 | Ne3i -> 3 + | Sv4i | Ne4 | Ne4i -> 4 | _ -> 1 let nelements = function + | SvV|Sv1|Sv2i|Sv3i|Sv4i | Ne1|Ne2|Ne2i|Ne3|Ne3i|Ne4|Ne4i -> 4 | NePa|NePaN -> 2 | NeP | NeAcqPc | NeRel -> 1 let pp_opt = function + | Sv2i | Sv3i | Sv4i | Ne2i | Ne3i | Ne4i -> "i" | _ -> "" @@ -89,6 +96,9 @@ module SIMD = struct match n with | Ne1 | Ne2 | Ne3 | Ne4 | Ne2i | Ne3i | Ne4i -> Printf.sprintf "Ne%i%s" (nregs n) (pp_opt n) + | Sv1 | Sv2i | Sv3i | Sv4i -> + Printf.sprintf "Sv%i%s" (nregs n) (pp_opt n) + | SvV -> "SvV" | NePa -> "NePa" | NePaN -> "NePaN" | NeP -> "NeP" @@ -107,9 +117,10 @@ module SIMD = struct for k = 0 to sz-1 do for i=0 to el-1 do let j = match n with - | Ne2i | Ne3i | Ne4i -> k+i*sz + | Sv2i | Sv3i | Sv4i | Ne2i | Ne3i | Ne4i -> k+i*sz | NeP | NeAcqPc | NeRel | NePa | NePaN - | Ne1 | Ne2 | Ne3 | Ne4 -> i+k*el + | Ne1 | Ne2 | Ne3 | Ne4 + | SvV | Sv1 -> i+k*el in v.(j) <- start+k done @@ -120,9 +131,10 @@ module SIMD = struct let el = nelements n in let sz = nregs n in let access r k = match n with - | Ne2i | Ne3i | Ne4i -> sz*k + r + | Sv2i | Sv3i | Sv4i | Ne2i | Ne3i | Ne4i -> sz*k + r | NeP | NeAcqPc | NeRel | NePa | NePaN - | Ne1 | Ne2 | Ne3 | Ne4 -> el*r + k + | Ne1 | Ne2 | Ne3 | Ne4 + | SvV | Sv1 -> el*r + k in let rec reg r k = if k >= el then [] @@ -335,6 +347,12 @@ let is_ifetch a = match a with else fun _ r -> r + let fold_sve = + if do_sve then + fun f -> SIMD.fold_sve (fun n -> f (Neon n)) + else + fun _ r -> r + let fold_pair f r = if do_mixed then r else @@ -357,6 +375,7 @@ let is_ifetch a = match a with let r = fold_morello f r in let r = fold_tag f r in let r = fold_neon f r in + let r = fold_sve f r in let r = fold_pair f r in let r = fold_acc_opt None f r in let r = @@ -456,10 +475,10 @@ let is_ifetch a = match a with function | NeP | NeAcqPc | NeRel -> 1 | NePa | NePaN -> 2 - | Ne1 -> 4 - | Ne2 | Ne2i -> 8 - | Ne3 | Ne3i -> 12 - | Ne4 | Ne4i -> 16 + | SvV | Sv1 | Ne1 -> 4 + | Sv2i | Ne2 | Ne2i -> 8 + | Sv3i | Ne3 | Ne3i -> 12 + | Sv4i | Ne4 | Ne4i -> 16 let atom_to_bank = function | Tag,None -> Code.Tag @@ -866,6 +885,7 @@ include type special = reg let specials = vregs + let specials2 = pregs end) end diff --git a/gen/AArch64Compile_gen.ml b/gen/AArch64Compile_gen.ml index 87fb37669..785cf29ce 100644 --- a/gen/AArch64Compile_gen.ml +++ b/gen/AArch64Compile_gen.ml @@ -60,11 +60,37 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | Vreg (r,(_,64)) -> A64.SIMDreg r | _ -> assert false (* ? *) + let to_scalable_vec r = match r with + | Vreg (r,(_,s)) -> A64.Zreg (r,s) + | _ -> assert false (* ? *) + let next_vreg x = A64.alloc_special x let next_scalar_reg x = let r,x = next_vreg x in to_scalar r,x + let next_zreg x = + let r,x = next_vreg x in + to_scalable_vec r,x + + let next_preg x = A64.alloc_special2 x + + let with_mode m r = match r with + | Preg (r,_) -> PMreg(r,m) + | PMreg (r,_) -> PMreg(r,m) + | _ -> assert false + + let pattern = function + | 1 -> VL1 + | 2 -> VL2 + | 3 -> VL3 + | 4 -> VL4 + | 5 -> VL5 + | 6 -> VL6 + | 7 -> VL7 + | 8 -> VL8 + | _ -> assert false + let pseudo = List.map (fun i -> Instruction i) let tempo1 st = A.alloc_trashed_reg "T1" st (* May be used for address *) @@ -85,6 +111,25 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | Ne2 | Ne2i -> call_rec Ne1 st | Ne3 | Ne3i -> call_rec Ne2 st | Ne4 | Ne4i -> call_rec Ne3 st + | _ -> assert false + in + fun n st -> + let (r,rs),st = get_reg_list n st in + (r,rs),A.set_friends r rs st + + let emit_zregs = + let rec call_rec n st = + let r1,st = next_zreg st in + let (r2,rs),st = get_reg_list n st in + (r1,(r2::rs)),st + and get_reg_list n st = + let open SIMD in + match n with + | Sv1 -> let r,st = next_zreg st in (r,[]),st + | Sv2i -> call_rec Sv1 st + | Sv3i -> call_rec Sv2i st + | Sv4i -> call_rec Sv3i st + | _ -> assert false in fun n st -> let (r,rs),st = get_reg_list n st in @@ -238,6 +283,15 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | Ne4i -> I_LD4M (rs,rt,K 0) | _ -> assert false + let ldnsv n rs pg rn idx= + let open SIMD in + match n with + | Sv1 -> I_LD1SP (VSIMD32,rs,with_mode Zero pg,rn,idx) + | Sv2i -> I_LD2SP (VSIMD32,rs,with_mode Zero pg,rn,idx) + | Sv3i -> I_LD3SP (VSIMD32,rs,with_mode Zero pg,rn,idx) + | Sv4i -> I_LD4SP (VSIMD32,rs,with_mode Zero pg,rn,idx) + | _ -> assert false + let ldr_mixed_idx v r1 r2 idx sz = let idx = MemExt.v2idx_reg v idx in let open MachSize in @@ -278,6 +332,15 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | Ne4i -> I_ST4M (rs,rt,K 0) | _ -> assert false + let stnsv n rs pg rn idx = + let open SIMD in + match n with + | Sv1 -> I_ST1SP (VSIMD32,rs,pg,rn,idx) + | Sv2i -> I_ST2SP (VSIMD32,rs,pg,rn,idx) + | Sv3i -> I_ST3SP (VSIMD32,rs,pg,rn,idx) + | Sv4i -> I_ST4SP (VSIMD32,rs,pg,rn,idx) + | _ -> assert false + let stxr_sz t sz r1 r2 r3 = let open MachSize in match sz with @@ -638,6 +701,73 @@ module Make(Cfg:Config) : XXXCompile_gen.S = r,init,pseudo csA@cs,st end + module LDNW = struct + + let emit_load_reg n st init rA idx = + let pred,st = next_preg st in + let acc,st = next_vreg st in + let nelem = SIMD.nelements n in + let ptrue = [I_PTRUE (pred,pattern nelem)] in + let (r,rs),st = emit_zregs n st in + let load = [ldnsv n (r::rs) pred rA idx] in + let reduce = (List.map (fun v -> I_ADD_SV (r,r,v)) rs)@[I_UADDV (VSIMD64,to_scalar acc,pred,r)] in + let rX,st = next_reg st in + let fmov = [I_FMOV_TG(V32,rX,VSIMD32,to_scalar acc)] in + rX,init,lift_code (ptrue@load@reduce@fmov),st + + let emit_load n st p init loc = + let open MemExt in + let idx = Imm(0,Idx) in + let rA,init,st = U.next_init st p init loc in + emit_load_reg n st init rA idx + + let emit_load_idx n v st p init loc ridx = + let open MemExt in + let rA,init,st = U.next_init st p init loc in + let rI,csI,st = match v with + | V32 -> + let r,st = next_reg st in + r,[sxtw r ridx],st + | _ -> ridx,[],st + in + let idx = Reg(V64,rI,LSL,2) in + let r,init,cs,st = emit_load_reg n st init rA idx in + r,init,pseudo csI@cs,st + end + + module LD1G = struct + + let emit_load_reg n st init rA idx = + let pred,st = next_preg st in + let acc,st = next_vreg st in + let nelem = SIMD.nelements n in + let ptrue = [I_PTRUE (pred,pattern nelem)] in + let r,st = next_zreg st in + let load = [I_LD1SP (VSIMD32,[r],with_mode Zero pred,rA,idx)] in + let reduce = [I_UADDV (VSIMD64,to_scalar acc,pred,r)] in + let rX,st = next_reg st in + let fmov = [I_FMOV_TG(V32,rX,VSIMD32,to_scalar acc)] in + rX,init,lift_code (ptrue@load@reduce@fmov),st + + let emit_load n st p init loc = + let open MemExt in + let rA,init,st = U.next_init st p init loc in + let rI,st = next_zreg st in + let csI = [I_INDEX_II (rI,0,1)] in + let idx = ZReg(rI,UXTW,2) in + let r,init,cs,st = emit_load_reg n st init rA idx in + r,init,pseudo csI@cs,st + + let emit_load_idx n v st p init loc ridx = + let open MemExt in + let rA,init,st = U.next_init st p init loc in + let rI,st = next_zreg st in + let csI = [I_INDEX_SI (rI,v,ridx,1)] in + let idx = ZReg(rI,UXTW,2) in + let r,init,cs,st = emit_load_reg n st init rA idx in + r,init,pseudo csI@cs,st + end + module LDG = struct let emit_load st p init x = let rA,st = next_reg st in @@ -912,6 +1042,98 @@ module Make(Cfg:Config) : XXXCompile_gen.S = init,lift_code(dup@movi@adds@stlur),st end + module STNW = struct + let emit_store_reg n st init rA v idx = + let pred,st = next_preg st in + let nelem = SIMD.nelements n in + let ptrue = [I_PTRUE (pred,pattern nelem)] in + let (r,rs),st = emit_zregs n st in + let mov_sv = List.mapi (fun i r -> I_MOV_SV (r,(v+i),S_NOEXT)) (r::rs) in + let setup = mov_sv@ptrue in + let store = [stnsv n (r::rs) pred rA idx] in + init,lift_code (setup@store),st + + let emit_store n st p init loc v = + let open MemExt in + let idx = Imm(0,Idx) in + let rA,init,st = U.next_init st p init loc in + emit_store_reg n st init rA v idx + + let emit_store_idx n vdep st p init loc ridx v = + let open MemExt in + let rA,init,st = U.next_init st p init loc in + let rI,csI,st = match vdep with + | V32 -> + let r,st = next_reg st in + r,[sxtw r ridx],st + | _ -> ridx,[],st + in + let idx = Reg(V64,rI,LSL,2) in + let init,cs,st = emit_store_reg n st init rA v idx in + init,pseudo csI@cs,st + + let emit_store_dep n vdep st init rA v = + let open MemExt in + let idx = Imm(0,Idx) in + let rB,st = next_zreg st in + let dup_sv = [I_DUP_SV (rB,V32,vdep)] in + let (r,rs),st = emit_zregs n st in + let mov_sv = List.mapi (fun i r -> I_MOV_SV (r,(v+i),S_NOEXT)) (r::rs) in + let add_sv = List.map (fun v -> I_ADD_SV(v,v,rB)) (r::rs) in + let pred,st = next_preg st in + let nelem = SIMD.nelements n in + let ptrue = [I_PTRUE (pred,pattern nelem)] in + let setup = dup_sv@mov_sv@add_sv@ptrue in + let store = [stnsv n (r::rs) pred rA idx] in + init,lift_code (setup@store),st + end + + module ST1S = struct + let emit_store_reg n st init rA v idx= + let pred,st = next_preg st in + let nelem = SIMD.nelements n in + let ptrue = [I_PTRUE (pred,pattern nelem)] in + let r,st = next_zreg st in + let mov_sv = [I_MOV_SV (r,v,S_NOEXT)] in + let setup = mov_sv@ptrue in + let store = [I_ST1SP (VSIMD32,[r],pred,rA,idx)] in + init,lift_code (setup@store),st + + let emit_store n st p init loc v = + let open MemExt in + let rA,init,st = U.next_init st p init loc in + let rI,st = next_zreg st in + let csI = [I_INDEX_II (rI,0,1)] in + let idx = ZReg(rI,UXTW,2) in + let init,cs,st = emit_store_reg n st init rA v idx in + init,pseudo csI@cs,st + + let emit_store_idx n vdep st p init loc ridx v = + let open MemExt in + let rA,init,st = U.next_init st p init loc in + let rI,st = next_zreg st in + let csI = [I_INDEX_SI (rI,vdep,ridx,1)] in + let idx = ZReg(rI,UXTW,2) in + let init,cs,st = emit_store_reg n st init rA v idx in + init,pseudo csI@cs,st + + let emit_store_dep n vdep st init rA v = + let open MemExt in + let rB,st = next_zreg st in + let dup_sv = [I_DUP_SV (rB,V32,vdep)] in + let r,st = next_zreg st in + let mov_sv = [I_MOV_SV (r,v,S_NOEXT)] in + let add_sv = [I_ADD_SV (r,r,rB)] in + let pred,st = next_preg st in + let rI,st = next_zreg st in + let nelem = SIMD.nelements n in + let ptrue = [I_PTRUE (pred,pattern nelem)] in + let index = [I_INDEX_II (rI,0,1)] in + let setup = dup_sv@mov_sv@add_sv@ptrue in + let store = [I_ST1SP (VSIMD32,[r],pred,rA,ZReg(rI,UXTW,2))] in + init,lift_code (index@setup@store),st + end + module STG = struct let emit_store_reg st p init x rA = @@ -1353,6 +1575,8 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | SIMD.NeP -> LDUR.emit_load | SIMD.NePa -> LDP.emit_load A64.TT | SIMD.NePaN -> LDP.emit_load A64.NT + | SIMD.Sv1 | SIMD.Sv2i | SIMD.Sv3i | SIMD.Sv4i-> LDNW.emit_load n + | SIMD.SvV -> LD1G.emit_load n | _ -> LDN.emit_load n in emit_load @@ -1473,6 +1697,8 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | SIMD.NeP -> LDUR.emit_load | SIMD.NePa -> LDP.emit_load A64.TT | SIMD.NePaN -> LDP.emit_load A64.NT + | SIMD.Sv1 | SIMD.Sv2i | SIMD.Sv3i | SIMD.Sv4i -> LDNW.emit_load n + | SIMD.SvV -> LD1G.emit_load n | _ -> LDN.emit_load n in let r,init,cs,st = emit_load st p init loc in @@ -1566,6 +1792,8 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | SIMD.NeP -> STUR.emit_store | SIMD.NePa -> STP.emit_store A64.TT | SIMD.NePaN -> STP.emit_store A64.NT + | SIMD.Sv1 | SIMD.Sv2i | SIMD.Sv3i | SIMD.Sv4i-> STNW.emit_store n + | SIMD.SvV -> ST1S.emit_store n | _ -> STN.emit_store n in let init,cs,st = emit_store st p init loc e.C.v in @@ -1925,6 +2153,8 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | SIMD.NeP -> LDUR.emit_load_idx | SIMD.NePa -> LDP.emit_load_idx A64.TT | SIMD.NePaN -> LDP.emit_load_idx A64.NT + | SIMD.Sv1 | SIMD.Sv2i | SIMD.Sv3i | SIMD.Sv4i -> LDNW.emit_load_idx n + | SIMD.SvV -> LD1G.emit_load_idx n | _ -> LDN.emit_load_idx n in let rB,init,cs,st = emit_load_idx vdep st p init loc r2 in @@ -2064,6 +2294,8 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | SIMD.NeP -> STUR.emit_store_idx | SIMD.NePa -> STP.emit_store_idx A64.TT | SIMD.NePaN -> STP.emit_store_idx A64.NT + | SIMD.Sv1 | SIMD.Sv2i | SIMD.Sv3i | SIMD.Sv4i -> STNW.emit_store_idx n + | SIMD.SvV -> ST1S.emit_store_idx n | _ -> STN.emit_store_idx n in let init,cs,st = emit_store_idx vdep st p init loc r2 e.C.v in @@ -2266,6 +2498,8 @@ module Make(Cfg:Config) : XXXCompile_gen.S = | SIMD.NeP -> STUR.emit_store_dep | SIMD.NePa -> STP.emit_store_dep A64.TT | SIMD.NePaN -> STP.emit_store_dep A64.NT + | SIMD.Sv1 | SIMD.Sv2i | SIMD.Sv3i | SIMD.Sv4i -> STNW.emit_store_dep n + | SIMD.SvV -> ST1S.emit_store_dep n | _ -> STN.emit_store_dep n in let init,cs,st = emit_store_dep r2 st init rA e.C.v in diff --git a/gen/X86_64Arch_gen.ml b/gen/X86_64Arch_gen.ml index 14cc43595..48cb9085b 100644 --- a/gen/X86_64Arch_gen.ml +++ b/gen/X86_64Arch_gen.ml @@ -255,5 +255,6 @@ module Make let free_registers = allowed_for_symb type special = xmm let specials = xmms + let specials2 = [] end) end diff --git a/gen/archExtra_gen.ml b/gen/archExtra_gen.ml index fa5795365..20c507861 100644 --- a/gen/archExtra_gen.ml +++ b/gen/archExtra_gen.ml @@ -25,6 +25,7 @@ module type I = sig type special val specials : special list + val specials2 : special list val pp_i : int -> string end @@ -74,6 +75,7 @@ module type S = sig type special val alloc_special : st -> special * st + val alloc_special2 : st -> special * st val set_friends : arch_reg -> arch_reg list -> st -> st val get_friends : st -> arch_reg -> arch_reg list @@ -219,6 +221,7 @@ with type arch_reg = I.arch_reg and type special = I.special { regs : arch_reg list ; map : arch_reg StringMap.t ; specials : I.special list ; + specials2 : I.special list ; noks : int ; env : TypBase.t LocMap.t ; (* Record types *) (* Group special registers together *) @@ -235,6 +238,7 @@ with type arch_reg = I.arch_reg and type special = I.special { regs = I.free_registers; map = StringMap.empty; specials = I.specials; + specials2 = I.specials2; noks = 0; env = LocMap.empty; friends = RegMap.empty; @@ -267,6 +271,10 @@ with type arch_reg = I.arch_reg and type special = I.special | [] -> Warn.fatal "No more special registers" | r::rs -> r,{ st with specials = rs; } + let alloc_special2 st = match st.specials2 with + | [] -> Warn.fatal "No more special registers" + | r::rs -> r,{ st with specials2 = rs; } + let set_friends r rs st = let friends = RegMap.add r rs st.friends in { st with friends; } diff --git a/gen/cycle.ml b/gen/cycle.ml index f2ece8675..c46b76d10 100644 --- a/gen/cycle.ml +++ b/gen/cycle.ml @@ -136,6 +136,7 @@ module Make (O:Config) (E:Edge.S) : let do_morello = O.variant Variant_gen.Morello let do_kvm = Variant_gen.is_kvm O.variant let do_neon = O.variant Variant_gen.Neon + let do_sve = O.variant Variant_gen.SVE type fence = E.fence type edge = E.edge @@ -207,8 +208,8 @@ module Make (O:Config) (E:Edge.S) : sprintf " (ord=%i) (ctag=%i) (cseal=%i) (dep=%i)" e.ord e.ctag e.cseal e.dep else fun _ -> "" - let debug_neon = - if do_neon then + let debug_vector = + if do_neon || do_sve then let pp_one = Code.add_vector O.hexa in fun e -> sprintf " (vecreg={%s})" @@ -231,7 +232,7 @@ module Make (O:Config) (E:Edge.S) : (Code.pp_loc e.loc) (match debug_vec e.cell with | "" -> "" | s -> "cell=[" ^ s ^"] ") - pp_v (debug_tag e) (debug_morello e) (debug_neon e) + pp_v (debug_tag e) (debug_morello e) (debug_vector e) let debug_edge = E.pp_edge diff --git a/gen/noSpecial.ml b/gen/noSpecial.ml index 50cec2f90..44044be20 100644 --- a/gen/noSpecial.ml +++ b/gen/noSpecial.ml @@ -18,3 +18,4 @@ type special let specials = [] +let specials2 = [] diff --git a/gen/noSpecial.mli b/gen/noSpecial.mli index a57469920..2a6c60e7d 100644 --- a/gen/noSpecial.mli +++ b/gen/noSpecial.mli @@ -18,3 +18,4 @@ type special val specials : special list +val specials2 : special list diff --git a/gen/variant_gen.ml b/gen/variant_gen.ml index efc531cc6..1494c5e02 100644 --- a/gen/variant_gen.ml +++ b/gen/variant_gen.ml @@ -38,6 +38,8 @@ type t = | KVM | FullKVM | NoFault (* Neon AArch64 extension *) | Neon +(* Scalable Vector extension (AArch64) *) + | SVE (* Constrained Unpredictable *) | ConstrainedUnpredictable @@ -63,6 +65,7 @@ let parse tag = match Misc.lowercase tag with | "fullkvm" -> Some FullKVM | "nofault" -> Some NoFault | "neon" -> Some Neon +| "sve" -> Some SVE | "constrainedunpredictable"|"cu" -> Some ConstrainedUnpredictable | _ -> None @@ -81,6 +84,7 @@ let pp = function | FullKVM -> "FullKvm" | NoFault -> "NoFault" | Neon -> "Neon" + | SVE -> "sve" | ConstrainedUnpredictable -> "ConstrainedUnpredictable" let is_mixed v = v Mixed || v FullMixed diff --git a/gen/variant_gen.mli b/gen/variant_gen.mli index b5898cc52..7df51d79c 100644 --- a/gen/variant_gen.mli +++ b/gen/variant_gen.mli @@ -40,6 +40,8 @@ type t = | NoFault (* Neon AArch64 extension *) | Neon +(* SVE AArch64 extension *) + | SVE (* Constrained Unpredictable, ie generate tests thar may exhibit such behaviours. Typically LDXR / STXR of different size or address. *) | ConstrainedUnpredictable diff --git a/herd/AArch64Arch_herd.ml b/herd/AArch64Arch_herd.ml index d49bbf743..784a95dc1 100644 --- a/herd/AArch64Arch_herd.ml +++ b/herd/AArch64Arch_herd.ml @@ -109,6 +109,14 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_SWP _| I_SWPBH _| I_SXTW _| I_TLBI _| I_UBFM _ | I_UDF _| I_UNSEAL _ | I_ADDSUBEXT _ | I_ABS _ | I_REV _ | I_EXTR _ | I_MOPL _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ + | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ + | I_MOV_SV _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ + | I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _ + | I_NEG_SV _ | I_MOVPRFX _ -> true let is_cmodx_restricted_value = @@ -218,6 +226,17 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = let vs = get_rec 0 in V.Val (Constant.ConcreteVector vs) + let predicate_mask psize = + let mask = match psize with + | 1 -> "0x1" + | 2 -> "0x3" + | 4 -> "0x7" + | 8 -> "0xff" + | _ -> assert false in + V.stringToV mask + + let scalable_mask = neon_mask + let simd_mem_access_size rs = match List.hd rs with | Vreg (_,(_,8)) -> MachSize.Byte | Vreg (_,(_,16)) -> MachSize.Short @@ -252,6 +271,11 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_LD4 (rs,_,_,_) | I_LD4R (rs,_,_) | I_ST4 (rs,_,_,_) | I_LD4M (rs,_,_) | I_ST4M (rs,_,_) -> Some (simd_mem_access_size rs) + | I_LD1SP (v,_,_,_,_) | I_ST1SP (v,_,_,_,_) + | I_LD2SP (v,_,_,_,_) | I_ST2SP (v,_,_,_,_) + | I_LD3SP (v,_,_,_,_) | I_ST3SP (v,_,_,_,_) + | I_LD4SP (v,_,_,_,_) | I_ST4SP (v,_,_,_,_) -> + Some (tr_simd_variant v) | I_LDRBH (v,_,_,_) | I_LDARBH (v,_,_,_) | I_LDRS ((_,v),_,_,_) | I_STRBH (v,_,_,_) | I_STLRBH (v,_,_) | I_STXRBH (v,_,_,_,_) | I_CASBH (v,_,_,_,_) | I_SWPBH (v,_,_,_,_) @@ -275,6 +299,11 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_MOVI_S _ | I_MOVI_V _ | I_ADDV _ | I_DUP _ | I_FMOV_TG _ | I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _ | I_UDF _ | I_ADDSUBEXT _ | I_MOPL _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ + | I_MOV_SV _ | I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _ + | I_NEG_SV _ | I_MOVPRFX _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ -> None let all_regs = @@ -306,6 +335,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_IC _|I_DC _|I_TLBI _ | I_NOP|I_TBZ _|I_TBNZ _ | I_BL _ | I_BLR _ | I_RET _ | I_ERET | I_UDF _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ -> [] (* For -variant self only ? *) | I_LDR (_,r1,r2,MemExt.Imm (_,(PreIdx|PostIdx))) | I_LDRBH (_,r1,r2,MemExt.Imm (_,(PreIdx|PostIdx))) @@ -338,11 +368,22 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_ADDV (_,r,_) | I_DUP (r,_,_) | I_FMOV_TG (_,r,_,_) + | I_WHILELT (r,_,_,_) | I_WHILELE (r,_,_,_) | I_WHILELO (r,_,_,_) | I_WHILELS (r,_,_,_) + | I_UADDV (_,r,_,_) + | I_MOV_SV (r,_,_) + | I_DUP_SV (r,_,_) | I_ADD_SV (r,_,_) | I_PTRUE (r,_) + | I_NEG_SV (r,_,_) | I_MOVPRFX (r,_,_) + | I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_) -> [r] | I_MSR (sr,_) -> [(SysReg sr)] | I_LDXP (_,_,r1,r2,_) -> [r1;r2;] + | I_LD1SP (_,rs,_,_,_) + | I_LD2SP (_,rs,_,_,_) + | I_LD3SP (_,rs,_,_,_) + | I_LD4SP (_,rs,_,_,_) + -> rs | I_LDAP1 _ | I_STL1 _ | I_LD1 _|I_LD1M _|I_LD1R _|I_LD2 _ @@ -403,6 +444,14 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_CSEL _|I_IC _|I_DC _|I_TLBI _|I_MRS _|I_MSR _ | I_STG _|I_STZG _|I_STZ2G _|I_LDG _|I_UDF _ | I_ADDSUBEXT _|I_MOPL _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ + | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ + | I_ADD_SV _ | I_PTRUE _ + | I_NEG_SV _ | I_MOVPRFX _ + | I_MOV_SV _ | I_DUP_SV _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ -> MachSize.No include ArchExtra_herd.Make(C) diff --git a/herd/AArch64ParseTest.ml b/herd/AArch64ParseTest.ml index 2b3bfdb71..2b54c61e3 100644 --- a/herd/AArch64ParseTest.ml +++ b/herd/AArch64ParseTest.ml @@ -72,7 +72,7 @@ module Make(Conf:RunTest.Config)(ModelConfig:MemCat.Config) = struct let module AArch64Value = CapabilityValue.Make(ConfMorello) in let module X = AArch64Make(AArch64Value) in X.X.run - else if Conf.variant Variant.Neon then + else if Conf.variant Variant.Neon || Conf.variant Variant.SVE then let module AArch64Value = Uint128Value.Make(ConfMorello) in let module X = AArch64Make(AArch64Value) in X.X.run diff --git a/herd/AArch64Sem.ml b/herd/AArch64Sem.ml index b516db6b2..177f22863 100644 --- a/herd/AArch64Sem.ml +++ b/herd/AArch64Sem.ml @@ -34,6 +34,7 @@ module Make let memtag = C.variant Variant.MemTag let morello = C.variant Variant.Morello let neon = C.variant Variant.Neon + let sve = C.variant Variant.SVE let kvm = C.variant Variant.VMSA let is_branching = kvm && not (C.variant Variant.NoPteBranch) let pte2 = kvm && C.variant Variant.PTE2 @@ -54,6 +55,18 @@ module Make "morello instruction %s require -variant morello" (AArch64.dump_instruction inst) + let check_neon inst = + if not (neon || sve) then + Warn.user_error + "Neon instruction %s requires -variant neon" + (AArch64.dump_instruction inst) + + let check_sve inst = + if not sve then + Warn.user_error + "SVE instruction %s requires -variant sve" + (AArch64.dump_instruction inst) + (* Barrier pretty print *) let barriers = let bs = AArch64Base.do_fold_dmb_dsb false true (fun h t -> h::t) [] @@ -154,7 +167,6 @@ module Make (A.Location_reg (ii.A.proc,r)) ii let read_reg_neon is_data r ii = - if not neon then Warn.user_error "Advanced SIMD instructions require -variant neon" ; let vr = match r with | AArch64Base.SIMDreg _ -> r | AArch64Base.Vreg(vr',_) -> (AArch64Base.SIMDreg vr') @@ -226,7 +238,6 @@ module Make neon_replicate old_val (nelem-1) esize v let write_reg_neon_sz sz r v ii = - if not neon then Warn.user_error "Advanced SIMD instructions require -variant neon" ; let vr = match r with | AArch64Base.SIMDreg _ -> r | AArch64Base.Vreg(vr',_) -> (AArch64Base.SIMDreg vr') @@ -250,6 +261,137 @@ module Make neon_replicate v nelem esize v >>= fun new_val -> write_reg_neon_sz sz r new_val ii | _ -> assert false + let predicate_psize r = match r with + | AArch64Base.Preg (_,esize) + (* Infer predicate bitsize from Z register *) + | AArch64Base.Zreg (_,esize) -> esize / 8 + | _ -> assert false + + let predicate_nelem r = match r with + | AArch64Base.Preg (_,esize) + (* Infer predicate elements from Z register *) + | AArch64Base.Zreg (_,esize) -> 128 / esize + | _ -> assert false + + let predicate_count predicate nelem = + let open AArch64 in + match predicate with + | VL1 when nelem >= 1 -> 1 + | VL2 when nelem >= 2 -> 2 + | VL3 when nelem >= 3 -> 3 + | VL4 when nelem >= 4 -> 4 + | VL5 when nelem >= 5 -> 5 + | VL6 when nelem >= 6 -> 6 + | VL7 when nelem >= 7 -> 7 + | VL8 when nelem >= 8 -> 8 + | VL16 when nelem >= 16 -> 16 + | VL32 when nelem >= 32 -> 32 + | VL128 when nelem >= 128 -> 128 + | VL256 when nelem >= 256 -> 256 + | MUL3 -> nelem - (nelem mod 3) + | MUL4 -> nelem - (nelem mod 4) + | ALL -> nelem + | POW2 -> + let rec calc n = + if nelem >= (Int.shift_left 1 n) then calc (n+1) + else Int.shift_left 1 (n-1) in + calc 1 + | _ -> 0 + + let read_reg_predicate is_data r ii = + let vr = match r with + | AArch64Base.Preg(_,_) | AArch64Base.PMreg(_,_) -> r + | _ -> assert false in + let location = A.Location_reg (ii.A.proc,vr) in + M.read_loc is_data (mk_read MachSize.Short Annot.N aexp) location ii + + let predicate_setlane old_val idx psize v = + let mask = V.op1 (Op.LeftShift (idx*psize)) (AArch64.predicate_mask psize) in + let invert = V.op1 Op.Inv mask in + M.op1 (Op.LeftShift (idx*psize)) v >>= fun new_val -> + M.op Op.And invert old_val >>| + M.op Op.And mask new_val >>= fun (v1,v2) -> + M.op Op.Or v1 v2 + + let predicate_getlane cur_val idx psize = + let mask = V.op1 (Op.LeftShift (idx*psize)) (AArch64.predicate_mask psize) in + M.op Op.And mask cur_val >>= fun masked_val -> + M.op1 (Op.LogicalRightShift (idx*psize)) masked_val + + let write_reg_predicate_sz sz r v ii = + let pr = match r with + | AArch64Base.Preg(_,_) -> r + | _ -> assert false in + (* Clear unused register bits (zero extend) *) + M.op1 (Op.Mask sz) v >>= fun v -> + let location = A.Location_reg (ii.A.proc,pr) in + M.write_loc (mk_write MachSize.Short Annot.N aexp Access.REG v) location ii + + let write_reg_predicate = write_reg_predicate_sz MachSize.Short + + let get_predicate_last pred psize idx = + predicate_getlane pred idx psize >>= fun v -> + M.op Op.And v V.one >>= fun last -> + M.op Op.Ne last V.zero + + let get_predicate_any pred psize nelem = + let mask idx = V.op1 (Op.LeftShift (idx*psize)) (AArch64.predicate_mask psize) in + let ops = List.map mask (Misc.interval 0 nelem) in + let allmask = List.fold_right (V.op Op.Or) ops V.zero in + M.op Op.And pred allmask >>= fun all -> + M.op Op.Ne all V.zero + + let scalable_esize r = match r with + | AArch64Base.Zreg (_,esize) -> esize + | _ -> assert false + + let scalable_nelem r = match r with + | AArch64Base.Zreg (_,esize) -> 128 / esize + | _ -> assert false + + let read_reg_scalable is_data r ii = + let vr = match r with + | AArch64Base.Zreg _ -> r + | _ -> assert false in + let location = A.Location_reg (ii.A.proc,vr) in + M.read_loc is_data (mk_read MachSize.S128 Annot.N aexp) location ii + + let scalable_setlane old_val idx esize v = + let mask = V.op1 (Op.LeftShift (idx*esize)) (AArch64.scalable_mask esize) in + let invert = V.op1 Op.Inv mask in + M.op1 (Op.LeftShift (idx*esize)) v >>= fun new_val -> + M.op Op.And invert old_val >>| + M.op Op.And mask new_val >>= fun (v1,v2) -> + M.op Op.Or v1 v2 + + let scalable_getlane cur_val idx esize = + let mask = V.op1 (Op.LeftShift (idx*esize)) (AArch64.scalable_mask esize) in + M.op Op.And mask cur_val >>= fun masked_val -> + M.op1 (Op.LogicalRightShift (idx*esize)) masked_val + + let rec scalable_replicate old_val nelem esize v = match nelem with + | 0 -> M.unitT old_val + | _ -> + scalable_setlane old_val (nelem-1) esize v >>= fun old_val -> + scalable_replicate old_val (nelem-1) esize v + + let write_reg_scalable_sz sz r v ii = + let pr = match r with + | AArch64Base.Zreg(_,_) -> r + | _ -> assert false in + (* Clear unused register bits (zero extend) *) + M.op1 (Op.Mask sz) v >>= fun v -> + let location = A.Location_reg (ii.A.proc,pr) in + M.write_loc (mk_write MachSize.S128 Annot.N aexp Access.REG v) location ii + + let write_reg_scalable = write_reg_scalable_sz MachSize.S128 + + let write_reg_scalable_rep r v ii = + let nelem = scalable_nelem r in + let esize = scalable_esize r in + scalable_replicate v nelem esize v >>= fun new_val -> + write_reg_scalable r new_val ii + let do_write_reg_sz mop sz r v ii = match r with | AArch64.ZR -> M.unitT () | _ -> match sz with @@ -1381,6 +1523,7 @@ module Make read_mem_postindexed a_virt op sz Annot.N aexp ac rd rs k a ii) ma ii) + | _ -> assert false let ldr sz = ldr0 (uxt_op sz) sz and ldrsw rd rs e ii = @@ -1543,6 +1686,7 @@ module Make str_simple sz rs rd (get_ea_preindexed rd k ii) ii | Reg (v,ri,sext,s) -> str_simple sz rs rd (get_ea_reg rd v ri sext s ii) ii + | _ -> assert false let stp_wback = @@ -2033,6 +2177,187 @@ module Make reduce (nelem-1) (M.add (V.intToV 0) (V.intToV 0)) >>= fun v -> write_reg_neon_sz sz r1 v ii + let uaddv var r1 p src ii = + let nelem = predicate_nelem src in + let psize = predicate_psize src in + let esize = scalable_esize src in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (read_reg_scalable false src ii >>= fun src -> + let read_active_elem_or_zero idx cur_val = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane cur_val idx esize) + mzero + in + let rec reduce cur_val n op = + match n with + | 0 -> op >>| read_active_elem_or_zero n cur_val >>= fun (v1,v2) -> M.add v1 v2 + | _ -> reduce cur_val (n-1) (op >>| read_active_elem_or_zero n cur_val >>= fun (v1,v2) -> M.add v1 v2) + in + reduce src (nelem-1) mzero) + mzero >>= fun v -> + let sz = AArch64Base.tr_simd_variant var in + write_reg_neon_sz sz r1 v ii + + let movprfx dst pg src ii = + let nelem = predicate_nelem src in + let psize = predicate_psize src in + let esize = scalable_esize dst in + let orig = match pg with + | AArch64Base.PMreg (_,AArch64Base.Zero) + -> mzero + | AArch64Base.PMreg (_,AArch64Base.Merge) + -> read_reg_scalable false dst ii + | _ -> assert false + in + orig >>| + read_reg_predicate false pg ii >>= fun (orig,pred) -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (read_reg_scalable false src ii >>= fun src -> + let copy orig cur_val idx = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane cur_val idx esize) + (scalable_getlane orig idx esize) + >>= fun v -> + scalable_setlane cur_val idx esize v + in + let rec reduce orig n op = + match n with + | 0 -> op >>= fun old_val -> copy orig old_val n + | _ -> reduce orig (n-1) (op >>= fun old_val -> copy orig old_val n) + in + reduce orig (nelem-1) (M.unitT src)) + (M.unitT orig) >>= fun v -> + write_reg_scalable dst v ii + + let neg dst pg src ii = + let nelem = predicate_nelem src in + let psize = predicate_psize src in + let esize = scalable_esize dst in + read_reg_scalable false dst ii >>| + read_reg_predicate false pg ii >>= fun (orig,pred) -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (read_reg_scalable false src ii >>= fun src -> + let negate orig cur_val idx = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane cur_val idx esize >>= fun v -> + M.op Op.Sub V.zero v) + (scalable_getlane orig idx esize) + >>= fun v -> + scalable_setlane cur_val idx esize v + in + let rec reduce orig n op = + match n with + | 0 -> op >>= fun old_val -> negate orig old_val n + | _ -> reduce orig (n-1) (op >>= fun old_val -> negate orig old_val n) + in + reduce orig (nelem-1) (M.unitT src)) + (M.unitT orig) >>= fun v -> + write_reg_scalable dst v ii + + let while_op compare unsigned p var r1 r2 ii = + let psize = predicate_psize p in + let nelem = predicate_nelem p in + let sz = AArch64Base.tr_variant var in + let extend = if unsigned then uxt_op sz else sxt_op sz in + read_reg_ord_sz sz r1 ii >>| + read_reg_ord_sz sz r2 ii >>= fun (v1,v2) -> + let rec repeat old_val idx v1 v2 last = + if idx < nelem then + extend v1 >>| + extend v2 >>= fun (v1,v2) -> + compare v1 v2 >>= fun (cond) -> + M.op Op.And last cond >>= fun last -> + predicate_setlane old_val idx psize last >>| + M.add v1 V.one >>= fun (old_val, v1) -> + repeat old_val (idx+1) v1 v2 last + else M.unitT old_val + in + repeat V.zero 0 v1 v2 V.one >>= fun (new_val) -> + write_reg_predicate p new_val ii >>| + ( let last idx = get_predicate_last new_val psize idx in + (* Fisrt active *) + let n = last 0 >>= fun v -> + M.op1 (Op.LeftShift 3) v in + (* Non active *) + let z = + let rec reduce idx op = match idx with + | 0 -> op >>| last idx >>= fun (v1,v2) -> M.op Op.Or v1 v2 + | _ -> reduce (idx-1) (op >>| last idx >>= fun (v1,v2) -> M.op Op.Or v1 v2) + in + reduce (nelem-1) mzero >>= fun v -> + M.op1 Op.Not v >>= fun v -> + M.op1 (Op.LeftShift 2) v in + (* Not last active*) + let c = last (nelem-1) >>= fun v -> + M.op1 Op.Not v >>= fun v -> + M.op1 (Op.LeftShift 1) v in + (* v always 0 *) + let flags = n >>| z >>= fun (v1,v2) -> + M.op Op.Or v1 v2 >>| c >>= fun (v1,v2) -> + M.op Op.Or v1 v2 in + flags >>= fun flags -> write_reg AArch64Base.NZCV flags ii + ) >>! new_val + + let ptrue p pattern ii = + let psize = predicate_psize p in + let nelem = predicate_nelem p in + let count = predicate_count pattern nelem in + let rec repeat old_val idx = + if idx < nelem then + let v = if idx < count then V.one else V.zero in + predicate_setlane old_val idx psize v >>= fun old_val -> + repeat old_val (idx+1) + else M.unitT old_val + in + repeat V.zero 0 >>= fun (new_val) -> + write_reg_predicate p new_val ii >>! new_val + + let mov_sv r k shift ii = + let open AArch64Base in + begin match shift with + | S_NOEXT + | S_LSL(0) -> + M.unitT (V.intToV k) + | S_LSL(8 as amount) -> + M.op1 (Op.LeftShift amount) (V.intToV k) + | S_LSL(n) -> + Warn.fatal + "illegal shift immediate %d in instruction mov" + n + | s -> + Warn.fatal + "illegal shift operand %s in in instruction mov" + (pp_barrel_shift "," s pp_imm) + end + >>= fun v -> write_reg_scalable_rep r v ii + + let index r v1 v2 ii = + let nelem = scalable_nelem r in + let esize = scalable_esize r in + let increment cur_val idx o = + M.add v1 o >>= fun v -> scalable_setlane cur_val idx esize v in + let rec reduce n op = + let i = V.op Op.Mul (V.intToV n) v2 in + match n with + | 0 -> op >>= fun old_val -> increment old_val n i + | _ -> reduce (n-1) (op >>= fun old_val -> increment old_val n i) + in + reduce (nelem-1) mzero >>= fun v -> + write_reg_scalable r v ii + (******************************) (* Move constant instructions *) (******************************) @@ -2223,6 +2548,125 @@ module Make let ops = List.mapi op rlist in List.fold_right (>>::) ops (M.unitT [[()]]) + let load_predicated_elem_or_zero_m sz p ma rlist ii = + let r = List.hd rlist in + let nelem = scalable_nelem r in + let psize = predicate_psize r in + let esize = scalable_esize r in + let nregs = List.length rlist in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (ma >>= fun addr -> + let op i r = + let calc_offset idx = (V.intToV ((idx*nregs+i) * MachSize.nbytes sz)) in + let load idx = + let offset = calc_offset idx in + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (M.add addr offset >>= fun addr -> + do_read_mem_ret sz Annot.N aexp Access.VIR addr ii) + mzero >>= fun v -> + M.op1 (Op.LeftShift (idx*esize)) v in + let rec reduce idx op = + match idx with + | 0 -> op >>| load idx >>= fun (v1,v2) -> M.op Op.Or v1 v2 + | _ -> reduce (idx-1) (op >>| load idx >>= fun (v1,v2) -> M.op Op.Or v1 v2) + in + reduce (nelem-1) mzero >>= fun v -> + write_reg_scalable r v ii + in + let ops = List.mapi op rlist in + List.fold_right (>>::) ops (M.unitT [()])) + (let ops = List.map (fun r -> write_reg_scalable r V.zero ii) rlist in + List.fold_right (>>::) ops (M.unitT [()])) + + let store_predicated_elem_or_merge_m sz p ma rlist ii = + let r = List.hd rlist in + let nelem = scalable_nelem r in + let psize = predicate_psize r in + let esize = scalable_esize r in + let nregs = List.length rlist in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (ma >>= fun addr -> + let op i r = + read_reg_scalable true r ii >>= fun v -> + let calc_offset idx = (V.intToV ((idx*nregs+i) * MachSize.nbytes sz)) in + let store idx = + let offset = calc_offset idx in + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (M.add addr offset >>| scalable_getlane v idx esize >>= fun (addr,v) -> + write_mem sz aexp Access.VIR addr v ii) + (M.unitT ()) + in + let rec reduce idx op = + match idx with + | 0 -> store idx >>:: op + | _ -> reduce (idx-1) (store idx >>:: op) + in + reduce (nelem-1) (M.unitT [()]) in + let ops = List.mapi op rlist in + List.fold_right (>>::) ops (M.unitT [[()]])) + (M.unitT [[()]]) + + let load_gather_predicated_elem_or_zero sz p ma mo rs e k ii = + let r = List.hd rs in + let psize = predicate_psize r in + let nelem = scalable_nelem r in + let esize = scalable_esize r in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (ma >>| mo >>= fun (base,offsets) -> + let load idx = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane offsets idx esize >>= memext_sext e k >>= fun o -> + M.add o base >>= fun addr -> + do_read_mem_ret sz Annot.N aexp Access.VIR addr ii) + mzero + >>= fun v -> M.op1 (Op.LeftShift (idx*esize)) v + in + let rec reduce idx op = + match idx with + | 0 -> op >>| load idx >>= fun (v1,v2) -> M.op Op.Or v1 v2 + | _ -> reduce (idx-1) (op >>| load idx >>= fun (v1,v2) -> M.op Op.Or v1 v2) + in + reduce (nelem-1) mzero) + mzero >>= fun v -> + write_reg_scalable r v ii + + let store_scatter_predicated_elem_or_merge sz p ma mo rs e k ii = + let r = List.hd rs in + let psize = predicate_psize r in + let nelem = scalable_nelem r in + let esize = scalable_esize r in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (ma >>| mo >>| read_reg_scalable true r ii >>= fun ((base,offsets),v) -> + let op idx = + let store = + (scalable_getlane offsets idx esize >>= memext_sext e k >>= fun o -> + M.add o base) >>| + scalable_getlane v idx esize >>= fun (addr,v) -> + write_mem sz aexp Access.VIR addr v ii in + get_predicate_last pred psize idx >>= fun last -> + M.choiceT last store (M.unitT ()) in + let ops = List.map op (Misc.interval 0 nelem) in + List.fold_right (>>::) ops (M.unitT [()])) + (M.unitT [()]) + (* Data cache operations *) let dc_loc op a ii = let mk_act loc = Act.CMO (AArch64.CMO.DC op,Some loc) in @@ -2565,46 +3009,60 @@ module Make (* Neon operations *) | I_ADDV(var,r1,r2) -> + check_neon inst; !(addv var r1 r2 ii) | I_DUP(r1,var,r2) -> + check_neon inst; !(let sz = tr_variant var in read_reg_ord_sz sz r2 ii >>= fun v -> write_reg_neon_rep (neon_sz r1) r1 v ii) | I_FMOV_TG(_,r1,_,r2) -> + check_neon inst; !(read_reg_neon false r2 ii >>= fun v -> write_reg r1 v ii) | I_MOV_VE(r1,i1,r2,i2) -> + check_neon inst; !(read_reg_neon_elem false r2 i2 ii >>= fun v -> write_reg_neon_elem MachSize.S128 r1 i1 v ii) | I_MOV_FG(r1,i,var,r2) -> + check_neon inst; !(let sz = tr_variant var in read_reg_ord_sz sz r2 ii >>= fun v -> write_reg_neon_elem MachSize.S128 r1 i v ii) | I_MOV_TG(_,r1,r2,i) -> + check_neon inst; !(read_reg_neon_elem false r2 i ii >>= fun v -> write_reg r1 v ii) | I_MOV_V(r1,r2) -> + check_neon inst; !(read_reg_neon false r2 ii >>= fun v -> write_reg_neon r1 v ii) | I_MOV_S(var,r1,r2,i) -> + check_neon inst; !(let sz = tr_simd_variant var in read_reg_neon_elem false r2 i ii >>= fun v -> write_reg_neon_sz sz r1 v ii) | I_MOVI_V(r,k,shift) -> + check_neon inst; !(movi_v r k shift ii) | I_MOVI_S(var,r,k) -> + check_neon inst; !(movi_s var r k ii) | I_EOR_SIMD(r1,r2,r3) -> + check_neon inst; let size = neon_sz r1 in !(simd_op EOR size r1 r2 r3 ii) | I_ADD_SIMD(r1,r2,r3) -> + check_neon inst; let size = neon_sz r1 in !(simd_op ADD size r1 r2 r3 ii) | I_ADD_SIMD_S(r1,r2,r3) -> + check_neon inst; !(simd_op ADD MachSize.Quad r1 r2 r3 ii) (* Neon loads and stores *) | I_LDAP1(rs,i,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (load_elem_ldar MachSize.S128 i) addr rs ii >>| post_kr rA addr kr ii)) @@ -2612,6 +3070,7 @@ module Make | I_LD2(rs,i,rA,kr) | I_LD3(rs,i,rA,kr) | I_LD4(rs,i,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (load_elem MachSize.S128 i) addr rs ii >>| post_kr rA addr kr ii)) @@ -2619,20 +3078,24 @@ module Make | I_LD2R(rs,rA,kr) | I_LD3R(rs,rA,kr) | I_LD4R(rs,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (load_elem_rep MachSize.S128) addr rs ii >>| post_kr rA addr kr ii)) | I_LD1M(rs,rA,kr) -> + check_neon inst; !!(read_reg_ord rA ii >>= fun addr -> (load_m_contigous addr rs ii >>| post_kr rA addr kr ii)) | I_LD2M(rs,rA,kr) | I_LD3M(rs,rA,kr) | I_LD4M(rs,rA,kr) -> + check_neon inst; !!(read_reg_ord rA ii >>= fun addr -> (load_m addr rs ii >>| post_kr rA addr kr ii)) | I_STL1(rs,i,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (store_elem_stlr i) addr rs ii >>| post_kr rA addr kr ii)) @@ -2640,69 +3103,85 @@ module Make | I_ST2(rs,i,rA,kr) | I_ST3(rs,i,rA,kr) | I_ST4(rs,i,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (store_elem i) addr rs ii >>| post_kr rA addr kr ii)) | I_ST1M(rs,rA,kr) -> + check_neon inst; !!!!(read_reg_ord rA ii >>= fun addr -> (store_m_contigous addr rs ii >>| post_kr rA addr kr ii)) | I_ST2M(rs,rA,kr) | I_ST3M(rs,rA,kr) | I_ST4M(rs,rA,kr) -> + check_neon inst; !!!!(read_reg_ord rA ii >>= fun addr -> (store_m addr rs ii >>| post_kr rA addr kr ii)) | I_LDR_SIMD(var,r1,rA,MemExt.Reg(v,kr,sext,s)) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea_reg rA v kr sext s ii >>= fun addr -> simd_ldr access_size addr r1 ii >>= B.next1T | I_LDR_SIMD(var,r1,rA,MemExt.Imm (k,Idx)) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea_idx rA k ii >>= fun addr -> simd_ldr access_size addr r1 ii >>= B.next1T | I_LDR_SIMD(var,r1,rA,MemExt.Imm (k,PreIdx)) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea_preindexed rA k ii >>= fun addr -> simd_ldr access_size addr r1 ii >>= B.next1T | I_LDR_SIMD(var,r1,rA,MemExt.Imm (k,PostIdx)) -> + check_neon inst; let access_size = tr_simd_variant var in read_reg_ord rA ii >>= fun addr -> simd_ldr access_size addr r1 ii >>| post_kr rA addr (K k) ii >>= B.next2T | I_LDUR_SIMD(var,r1,rA,k) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea rA (K k) S_NOEXT ii >>= fun addr -> simd_ldr access_size addr r1 ii >>= B.next1T | I_LDAPUR_SIMD(var,r1,rA,k) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea rA (K k) S_NOEXT ii >>= fun addr -> simd_ldar access_size addr r1 ii >>= B.next1T | I_STR_SIMD(var,r1,rA,MemExt.Reg (v,kr,sext,s)) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_reg rA v kr sext s ii in simd_str access_size ma r1 ii | I_STR_SIMD(var,r1,rA,MemExt.Imm (k,Idx)) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_idx rA k ii in simd_str access_size ma r1 ii | I_STR_SIMD(var,r1,rA,MemExt.Imm (k,PreIdx)) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_preindexed rA k ii in simd_str access_size ma r1 ii | I_STR_SIMD(var,r1,rA,MemExt.Imm (k,PostIdx)) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = read_reg_ord rA ii in simd_str_p access_size ma r1 rA (K k) ii | I_STUR_SIMD(var,r1,rA,k) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_idx rA k ii in simd_str access_size ma r1 ii | I_STLUR_SIMD(var,r1,rA,k) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_idx rA k ii in simd_stlr access_size ma r1 ii | I_LDP_SIMD(tnt,var,r1,r2,r3,idx) -> + check_neon inst; begin match idx with | k,Idx -> @@ -2718,6 +3197,7 @@ module Make fun (b,()) -> M.unitT b end | I_STP_SIMD(tnt,var,r1,r2,r3,idx) -> + check_neon inst; begin match idx with | k,Idx -> @@ -2732,6 +3212,118 @@ module Make post_kr r3 addr (K k) ii) >>= fun (b,()) -> M.unitT b end + (* Scalable vector instructions *) + | I_LD1SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_LD2SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_LD3SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_LD4SP(var,rs,p,rA,MemExt.Imm (k,Idx)) -> + check_sve inst; + !!!(let sz = tr_simd_variant var in + let ma = get_ea_idx rA k ii in + load_predicated_elem_or_zero_m sz p ma rs ii >>| + M.unitT ()) + | I_LD1SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_LD2SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_LD3SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_LD4SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) -> + check_sve inst; + !!!(let sz = tr_simd_variant var in + let ma = get_ea_reg rA V64 rM MemExt.LSL s ii in + load_predicated_elem_or_zero_m sz p ma rs ii >>| + M.unitT ()) + | I_LD1SP (var,rs,p,rA,MemExt.ZReg (rM,sext,s)) -> + check_sve inst; + !(let sz = tr_simd_variant var in + let ma = read_reg_ord rA ii in + let mo = read_reg_scalable false rM ii in + load_gather_predicated_elem_or_zero sz p ma mo rs sext s ii) + | I_ST1SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_ST2SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_ST3SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_ST4SP(var,rs,p,rA,MemExt.Imm (k,Idx)) -> + check_sve inst; + !!!!(let sz = tr_simd_variant var in + let ma = get_ea_idx rA k ii in + store_predicated_elem_or_merge_m sz p ma rs ii >>| + M.unitT ()) + | I_ST1SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_ST2SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_ST3SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_ST4SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) -> + check_sve inst; + !!!!(let sz = tr_simd_variant var in + let ma = get_ea_reg rA V64 rM MemExt.LSL s ii in + store_predicated_elem_or_merge_m sz p ma rs ii >>| + M.unitT ()) + | I_ST1SP (var,rs,p,rA,MemExt.ZReg (rM,sext,s)) -> + check_sve inst; + !!!(let sz = tr_simd_variant var in + let ma = read_reg_ord rA ii in + let mo = read_reg_scalable false rM ii in + store_scatter_predicated_elem_or_merge sz p ma mo rs sext s ii >>| + M.unitT ()) + | I_PTRUE(p,pattern) -> + check_sve inst; + ptrue p pattern ii + >>= nextSet p + | I_WHILELT(p,var,r1,r2) -> + check_sve inst; + while_op (M.op Op.Lt) false p var r1 r2 ii >>= nextSet p + | I_WHILELO(p,var,r1,r2) -> + check_sve inst; + while_op (M.op Op.Lt) true p var r1 r2 ii >>= nextSet p + | I_WHILELE(p,var,r1,r2) -> + check_sve inst; + while_op (M.op Op.Le) false p var r1 r2 ii >>= nextSet p + | I_WHILELS(p,var,r1,r2) -> + check_sve inst; + while_op (M.op Op.Le) true p var r1 r2 ii >>= nextSet p + | I_ADD_SV (r1,r2,r3) -> + check_sve inst; + !(read_reg_scalable false r3 ii >>| + read_reg_scalable false r2 ii >>= fun (v1,v2) -> + M.add v1 v2 >>= fun v -> + write_reg_scalable r1 v ii) + | I_UADDV(var,v,p,z) -> + check_sve inst; + !(uaddv var v p z ii) + | I_MOVPRFX(r1,pg,r2) -> + check_sve inst; + !(movprfx r1 pg r2 ii) + | I_NEG_SV(r1,pg,r2) -> + check_sve inst; + !(neg r1 pg r2 ii) + | I_MOV_SV(r,k,shift) -> + check_sve inst; + !(mov_sv r k shift ii) + | I_DUP_SV(r1,var,r2) -> + check_sve inst; + !(let sz = tr_variant var in + read_reg_ord_sz sz r2 ii >>= fun v -> + write_reg_scalable_rep r1 v ii) + | I_INDEX_SI (r1,var,r2,k) -> + check_sve inst; + !(let sz = tr_variant var in + let v2 = V.intToV k in + read_reg_ord_sz sz r2 ii >>= fun v1 -> + index r1 v1 v2 ii) + | I_INDEX_IS (r1,var,k,r2) -> + check_sve inst; + !(let sz = tr_variant var in + let v1 = V.intToV k in + read_reg_ord_sz sz r2 ii >>= fun v2 -> + index r1 v1 v2 ii) + | I_INDEX_SS (r1,var,r2,r3) -> + check_sve inst; + !(let sz = tr_variant var in + read_reg_ord_sz sz r2 ii >>| + read_reg_ord_sz sz r3 ii >>= fun (v1,v2) -> + index r1 v1 v2 ii) + | I_INDEX_II (r1,k1,k2) -> + check_sve inst; + !(let v1 = V.intToV k1 in + let v2 = V.intToV k2 in + index r1 v1 v2 ii) (* Morello instructions *) | I_ALIGND(rd,rn,k) -> check_morello inst ; @@ -3135,7 +3727,10 @@ module Make m_fault >>| set_elr_el1 lbl_v ii >>! B.Fault [AArch64Base.elr_el1, lbl_v] (* Cannot handle *) (* | I_BL _|I_BLR _|I_BR _|I_RET _ *) - | (I_STG _|I_STZG _|I_STZ2G _) as i -> + | (I_STG _|I_STZG _|I_STZ2G _ + | I_LDR_SIMD _| I_STR_SIMD _ + | I_LD1SP _| I_LD2SP _| I_LD3SP _| I_LD4SP _ + | I_ST1SP _|I_ST2SP _|I_ST3SP _|I_ST4SP _) as i -> Warn.fatal "illegal instruction: %s" (AArch64.dump_instruction i) (* Compute a safe set of instructions that can diff --git a/herd/libdir/aarch64util.cat b/herd/libdir/aarch64util.cat index b117f3f9f..50df3229c 100644 --- a/herd/libdir/aarch64util.cat +++ b/herd/libdir/aarch64util.cat @@ -132,4 +132,8 @@ flag ~empty (if "vmsa" && "ifetch" then _ else 0) flag ~empty (if "memtag" && "ifetch" then _ else 0) as combining-memtag-and-ifetch-is-not-supported +(* Flag the runs of herd7 with work-in-progress features *) +flag ~empty (if "sve" then _ else 0) + as Scalable-Vector-Extensions-is-work-in-progress + include "aarch64loc.cat" diff --git a/herd/tests/instructions/AArch64.sve/V01.litmus b/herd/tests/instructions/AArch64.sve/V01.litmus new file mode 100644 index 000000000..c28c2ef20 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V01.litmus @@ -0,0 +1,13 @@ +AArch64 V01 +(* Add vectors (unpredicated) *) +{ +} +P0 ; + MOV Z0.S, #1 ; + MOV Z1.S, #2 ; + ADD Z2.S,Z1.S,Z0.S ; + MOV Z3.D, #3 ; + MOV Z4.D, #4 ; + ADD Z5.D,Z4.D,Z3.D ; + +forall 0:V2.4S={3,3,3,3} /\ 0:V5.4S={7,0,7,0} diff --git a/herd/tests/instructions/AArch64.sve/V01.litmus.expected b/herd/tests/instructions/AArch64.sve/V01.litmus.expected new file mode 100644 index 000000000..20f7a9198 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V01.litmus.expected @@ -0,0 +1,11 @@ +Test V01 Required +States 1 +0:V2.4S={3,3,3,3}; 0:V5.4S={7,0,7,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V2.4S={3,3,3,3} /\ 0:V5.4S={7,0,7,0}) +Observation V01 Always 1 0 +Hash=dd05dffbd617d9f8658242b89d30bec8 + diff --git a/herd/tests/instructions/AArch64.sve/V02.litmus b/herd/tests/instructions/AArch64.sve/V02.litmus new file mode 100644 index 000000000..d5ff66ef6 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V02.litmus @@ -0,0 +1,10 @@ +AArch64 V02 +(* Broadcast general-purpose register to vector elements (unpredicated) *) +{ +} +P0 ; + MOV W0,#4 ; + DUP Z0.H, W0 ; + DUP Z1.S, W0 ; + +forall 0:V0.4S={0x40004,0x40004,0x40004,0x40004} /\ 0:V1.4S={4,4,4,4} diff --git a/herd/tests/instructions/AArch64.sve/V02.litmus.expected b/herd/tests/instructions/AArch64.sve/V02.litmus.expected new file mode 100644 index 000000000..294fff164 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V02.litmus.expected @@ -0,0 +1,11 @@ +Test V02 Required +States 1 +0:V0.4S={262148,262148,262148,262148}; 0:V1.4S={4,4,4,4}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={262148,262148,262148,262148} /\ 0:V1.4S={4,4,4,4}) +Observation V02 Always 1 0 +Hash=de76ac983b66dc71631de247107dea60 + diff --git a/herd/tests/instructions/AArch64.sve/V03.litmus b/herd/tests/instructions/AArch64.sve/V03.litmus new file mode 100644 index 000000000..966ae192a --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V03.litmus @@ -0,0 +1,13 @@ +AArch64 V03 +(* Initialise predicate from named constraint *) +{ + int x[4]; + + 0:X0=x; +} +P0 ; + PTRUE P0.S,VL3 ; + MOV Z0.S,#1 ; + ST1W {Z0.S},P0,[X0]; + +forall(x = {1,1,1,0}) diff --git a/herd/tests/instructions/AArch64.sve/V03.litmus.expected b/herd/tests/instructions/AArch64.sve/V03.litmus.expected new file mode 100644 index 000000000..7cf15e89b --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V03.litmus.expected @@ -0,0 +1,11 @@ +Test V03 Required +States 1 +x={1,1,1,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,1,1,0}) +Observation V03 Always 1 0 +Hash=4d6c70541c7e2576a5148e43c261e804 + diff --git a/herd/tests/instructions/AArch64.sve/V04.litmus b/herd/tests/instructions/AArch64.sve/V04.litmus new file mode 100644 index 000000000..6deed91eb --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V04.litmus @@ -0,0 +1,13 @@ +AArch64 V04 +(* Contiguous load unsigned bytes to vector (scalar index), 32-bit element *) +{ + uint8_t x[4] = {1,2,3,4}; + + 0:X0=x; +} +P0 ; + MOV X1,#0 ; + PTRUE P0.S,VL4 ; + LD1B {Z1.S},P0/Z,[X0,X1] ; + +forall(0:V1.4S = {1,2,3,4}) diff --git a/herd/tests/instructions/AArch64.sve/V04.litmus.expected b/herd/tests/instructions/AArch64.sve/V04.litmus.expected new file mode 100644 index 000000000..58a584dc9 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V04.litmus.expected @@ -0,0 +1,11 @@ +Test V04 Required +States 1 +0:V1.4S={1,2,3,4}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,2,3,4}) +Observation V04 Always 1 0 +Hash=6c05c2f57e2f2a230130f55ecf9803c1 + diff --git a/herd/tests/instructions/AArch64.sve/V05.litmus b/herd/tests/instructions/AArch64.sve/V05.litmus new file mode 100644 index 000000000..8dab731b9 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V05.litmus @@ -0,0 +1,13 @@ +AArch64 V05 +(* Contiguous load two-word structures to two vectors (scalar index) *) +{ + uint16_t x[8] = {1,2,0,0,0,0,0,0}; + + 0:X0=x; +} +P0 ; + MOV X2,#0 ; + PTRUE P0.H,VL1 ; + LD2H {Z0.H,Z1.H},P0/Z,[X0,X2,LSL #1] ; + +forall(0:V0.4S = {1,0,0,0} /\ 0:V1.4S = {2,0,0,0}) diff --git a/herd/tests/instructions/AArch64.sve/V05.litmus.expected b/herd/tests/instructions/AArch64.sve/V05.litmus.expected new file mode 100644 index 000000000..77c4088ab --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V05.litmus.expected @@ -0,0 +1,11 @@ +Test V05 Required +States 1 +0:V0.4S={1,0,0,0}; 0:V1.4S={2,0,0,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={1,0,0,0} /\ 0:V1.4S={2,0,0,0}) +Observation V05 Always 1 0 +Hash=82b3428acf9418cdbdbcd1cac3a5204b + diff --git a/herd/tests/instructions/AArch64.sve/V06.litmus b/herd/tests/instructions/AArch64.sve/V06.litmus new file mode 100644 index 000000000..9d5a1cba2 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V06.litmus @@ -0,0 +1,13 @@ +AArch64 V06 +(* Contiguous load three-word structures to three vectors (scalar index) *) +{ + int x[12] = {1,2,3,1,2,3,1,2,3,1,2,3}; + + 0:X0=x; +} +P0 ; + MOV X2,#0 ; + PTRUE P0.S,VL4 ; + LD3W {Z0.S,Z1.S,Z2.S},P0/Z,[X0,X2,LSL #2] ; + +forall(0:V0.4S = {1,1,1,1} /\ 0:V1.4S = {2,2,2,2} /\ 0:V2.4S = {3,3,3,3}) diff --git a/herd/tests/instructions/AArch64.sve/V06.litmus.expected b/herd/tests/instructions/AArch64.sve/V06.litmus.expected new file mode 100644 index 000000000..2e43efd96 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V06.litmus.expected @@ -0,0 +1,11 @@ +Test V06 Required +States 1 +0:V0.4S={1,1,1,1}; 0:V1.4S={2,2,2,2}; 0:V2.4S={3,3,3,3}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={1,1,1,1} /\ 0:V1.4S={2,2,2,2} /\ 0:V2.4S={3,3,3,3}) +Observation V06 Always 1 0 +Hash=0b0d64e1c8f343cdce7976205bc08f2b + diff --git a/herd/tests/instructions/AArch64.sve/V07.litmus b/herd/tests/instructions/AArch64.sve/V07.litmus new file mode 100644 index 000000000..42c7395a3 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V07.litmus @@ -0,0 +1,12 @@ +AArch64 V07 +(* Contiguous load four-word structures to four vectors (scalar index) *) +{ + uint64_t x[8] = {1,2,3,4,1,2,3,4}; + + 0:X0=x; +} +P0 ; + MOV X2,#0 ; + PTRUE P0.D,VL2 ; + LD4D {Z0.D,Z1.D,Z2.D,Z3.D},P0/Z,[X0,X2,LSL #3]; +forall(0:V0.4S = {1,0,1,0} /\ 0:V1.4S = {2,0,2,0} /\ 0:V2.4S = {3,0,3,0} /\ 0:V3.4S = {4,0,4,0}) diff --git a/herd/tests/instructions/AArch64.sve/V07.litmus.expected b/herd/tests/instructions/AArch64.sve/V07.litmus.expected new file mode 100644 index 000000000..b2312ff2b --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V07.litmus.expected @@ -0,0 +1,11 @@ +Test V07 Required +States 1 +0:V0.4S={1,0,1,0}; 0:V1.4S={2,0,2,0}; 0:V2.4S={3,0,3,0}; 0:V3.4S={4,0,4,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={1,0,1,0} /\ 0:V1.4S={2,0,2,0} /\ 0:V2.4S={3,0,3,0} /\ 0:V3.4S={4,0,4,0}) +Observation V07 Always 1 0 +Hash=b6883bda119e7c00f9493ab4b2a289a8 + diff --git a/herd/tests/instructions/AArch64.sve/V08.litmus b/herd/tests/instructions/AArch64.sve/V08.litmus new file mode 100644 index 000000000..856ff5b81 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V08.litmus @@ -0,0 +1,9 @@ +AArch64 V08 +(* Broadcast signed immediate to vector elements (unpredicated) *) +{ +} +P0 ; + MOV Z0.S,#4 ; + MOV Z1.B,#1 ; + +forall 0:V0.4S={4,4,4,4} /\ 0:V1.4S={0x1010101,0x1010101,0x1010101,0x1010101} diff --git a/herd/tests/instructions/AArch64.sve/V08.litmus.expected b/herd/tests/instructions/AArch64.sve/V08.litmus.expected new file mode 100644 index 000000000..fa1a3572c --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V08.litmus.expected @@ -0,0 +1,11 @@ +Test V08 Required +States 1 +0:V0.4S={4,4,4,4}; 0:V1.4S={16843009,16843009,16843009,16843009}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={4,4,4,4} /\ 0:V1.4S={16843009,16843009,16843009,16843009}) +Observation V08 Always 1 0 +Hash=325848570958ac8e2b931cf1c48564dd + diff --git a/herd/tests/instructions/AArch64.sve/V09.litmus b/herd/tests/instructions/AArch64.sve/V09.litmus new file mode 100644 index 000000000..535dca123 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V09.litmus @@ -0,0 +1,14 @@ +AArch64 V09 +(* Contiguous store words from vector (scalar index) *) +{ + uint8_t x[4]; + + 0:X0=x; +} +P0 ; + MOV Z1.S,#1 ; + MOV X1,#0 ; + PTRUE P0.H,VL4 ; + ST1B {Z1.H},P0,[X0,X1] ; + +forall x={1,0,1,0} diff --git a/herd/tests/instructions/AArch64.sve/V09.litmus.expected b/herd/tests/instructions/AArch64.sve/V09.litmus.expected new file mode 100644 index 000000000..30792c89c --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V09.litmus.expected @@ -0,0 +1,11 @@ +Test V09 Required +States 1 +x={1,0,1,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,0,1,0}) +Observation V09 Always 1 0 +Hash=b5926a609acc60970724fd0fd2512677 + diff --git a/herd/tests/instructions/AArch64.sve/V10.litmus b/herd/tests/instructions/AArch64.sve/V10.litmus new file mode 100644 index 000000000..c826f61ad --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V10.litmus @@ -0,0 +1,15 @@ +AArch64 V10 +(* Contiguous store two-word structures from two vectors (scalar index) *) +{ + uint16_t x[8]; + + 0:X0=x; +} +P0 ; + MOV Z1.H,#1 ; + MOV Z2.H,#2 ; + MOV X1,#0 ; + PTRUE P0.H,VL4 ; + ST2H {Z1.H,Z2.H},P0,[X0,X1, LSL #1] ; + +forall x={1,2,1,2,1,2,1,2} diff --git a/herd/tests/instructions/AArch64.sve/V10.litmus.expected b/herd/tests/instructions/AArch64.sve/V10.litmus.expected new file mode 100644 index 000000000..9b094a548 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V10.litmus.expected @@ -0,0 +1,11 @@ +Test V10 Required +States 1 +x={1,2,1,2,1,2,1,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,2,1,2,1,2,1,2}) +Observation V10 Always 1 0 +Hash=b9d08a35b5381eeb0056a1674df33653 + diff --git a/herd/tests/instructions/AArch64.sve/V11.litmus b/herd/tests/instructions/AArch64.sve/V11.litmus new file mode 100644 index 000000000..2e8922659 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V11.litmus @@ -0,0 +1,16 @@ +AArch64 V11 +(* Contiguous store three-word structures from three vectors (scalar index) *) +{ + int x[12]; + + 0:X0=x; +} +P0 ; + MOV Z1.S,#1 ; + MOV Z2.S,#2 ; + MOV Z3.S,#3 ; + MOV X1,#0 ; + PTRUE P0.S,VL4 ; + ST3W {Z1.S,Z2.S,Z3.S},P0,[X0,X1, LSL #2]; + +forall x={1,2,3,1,2,3,1,2,3,1,2,3} diff --git a/herd/tests/instructions/AArch64.sve/V11.litmus.expected b/herd/tests/instructions/AArch64.sve/V11.litmus.expected new file mode 100644 index 000000000..3ae3f517f --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V11.litmus.expected @@ -0,0 +1,11 @@ +Test V11 Required +States 1 +x={1,2,3,1,2,3,1,2,3,1,2,3}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,2,3,1,2,3,1,2,3,1,2,3}) +Observation V11 Always 1 0 +Hash=a3c15b9b49c3645ba9113a9354c7a46a + diff --git a/herd/tests/instructions/AArch64.sve/V12.litmus b/herd/tests/instructions/AArch64.sve/V12.litmus new file mode 100644 index 000000000..c7875b67b --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V12.litmus @@ -0,0 +1,17 @@ +AArch64 V12 +(* Contiguous store four-word structures from four vectors (scalar index) *) +{ + uint64_t x[8]; + + 0:X0=x; +} +P0 ; + MOV Z1.D,#1 ; + MOV Z2.D,#2 ; + MOV Z3.D,#3 ; + MOV Z4.D,#4 ; + MOV X1,#0 ; + PTRUE P0.D,VL2 ; + ST4D {Z1.D,Z2.D,Z3.D,Z4.D},P0,[X0,X1, LSL #3] ; + +forall x={1,2,3,4,1,2,3,4} diff --git a/herd/tests/instructions/AArch64.sve/V12.litmus.expected b/herd/tests/instructions/AArch64.sve/V12.litmus.expected new file mode 100644 index 000000000..84a3a3970 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V12.litmus.expected @@ -0,0 +1,11 @@ +Test V12 Required +States 1 +x={1,2,3,4,1,2,3,4}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,2,3,4,1,2,3,4}) +Observation V12 Always 1 0 +Hash=f059652d76c3a984d2cb9d3649b83389 + diff --git a/herd/tests/instructions/AArch64.sve/V13.litmus b/herd/tests/instructions/AArch64.sve/V13.litmus new file mode 100644 index 000000000..df817f62b --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V13.litmus @@ -0,0 +1,10 @@ +AArch64 V13 +(* Unsigned add reduction to scalar *) +{ +} +P0 ; + MOV Z0.S,#4 ; + PTRUE P0.S,VL3 ; + UADDV D1,P0,Z0.S ; + +forall 0:V1.4S={12,0,0,0} diff --git a/herd/tests/instructions/AArch64.sve/V13.litmus.expected b/herd/tests/instructions/AArch64.sve/V13.litmus.expected new file mode 100644 index 000000000..14bf8976a --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V13.litmus.expected @@ -0,0 +1,11 @@ +Test V13 Required +States 1 +0:V1.4S={12,0,0,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={12,0,0,0}) +Observation V13 Always 1 0 +Hash=5e391bba5dda1e4a4f1f0a45c2c48975 + diff --git a/herd/tests/instructions/AArch64.sve/V14.litmus b/herd/tests/instructions/AArch64.sve/V14.litmus new file mode 100644 index 000000000..8510ae7cf --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V14.litmus @@ -0,0 +1,12 @@ +AArch64 V14 +(* INDEX *) +{ +} +P0 ; + MOV W0,#2 ; + INDEX Z0.S,W0,#3 ; + INDEX Z1.S,#0,#1 ; + INDEX Z2.H,W0,W0 ; + INDEX Z3.D,#1,#2 ; + +forall 0:V0.4S={2,5,8,11} /\ 0:V1.4S={0,1,2,3} /\ 0:V2.4S={0x40002,0x80006,0xc000a,0x10000e} /\ 0:V3.4S={1,0,3,0} diff --git a/herd/tests/instructions/AArch64.sve/V14.litmus.expected b/herd/tests/instructions/AArch64.sve/V14.litmus.expected new file mode 100644 index 000000000..91975537e --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V14.litmus.expected @@ -0,0 +1,11 @@ +Test V14 Required +States 1 +0:V0.4S={2,5,8,11}; 0:V1.4S={0,1,2,3}; 0:V2.4S={262146,524294,786442,1048590}; 0:V3.4S={1,0,3,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={2,5,8,11} /\ 0:V1.4S={0,1,2,3} /\ 0:V2.4S={262146,524294,786442,1048590} /\ 0:V3.4S={1,0,3,0}) +Observation V14 Always 1 0 +Hash=93b29826278f620a466dfd3446e3f014 + diff --git a/herd/tests/instructions/AArch64.sve/V15.litmus b/herd/tests/instructions/AArch64.sve/V15.litmus new file mode 100644 index 000000000..d2ac5fbf5 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V15.litmus @@ -0,0 +1,35 @@ +AArch64 V15 +(* WHILELT (predicate) *) +{ +} +P0 ; + MOV W0,#0 ; + MOV W1,#0 ; + WHILELT P0.S,W0,W1 ; (* No active elemenets *) + B.EQ L0 ; (* All Active elements were FALSE or there were no Active elements. Z == 1 *) + MOV W2,#1 ; +L0: ; + B.CS L1 ; (* The Last active element was FALSE or there were no Active elements. C == 1 *) + MOV W2,#2 ; +L1: ; + B.PL L3 ; (* The First active element was FALSE or there were no Active elements. N == 0 *) + MOV W2,#3 ; +L3: + MOV W1,#1 ; (* First element is active but not Last *) + WHILELT P1.S,W0,W1 ; + B.NE L4 ; (* An Active element was TRUE. Z == 0 *) + MOV W2,#4 ; +L4: ; + B.MI L5 ; (* The First active element was TRUE. N == 1 *) + MOV W2,#5 ; +L5: ; + B.HI L6 ; (* An Active element was TRUE, but the Last active element was FALSE. C ==1 && Z == 0 *) + MOV W2,#6 ; +L6: ; + MOV W1,#64 ; + WHILELT P2.S,W0,W1 ; (* All elements are active including Last *) + B.CC END ; (* The Last active element was TRUE. C == 0 *) + MOV W2,#7 ; +END: ; + +forall 0:X2=0 diff --git a/herd/tests/instructions/AArch64.sve/V15.litmus.expected b/herd/tests/instructions/AArch64.sve/V15.litmus.expected new file mode 100644 index 000000000..ed0c94278 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V15.litmus.expected @@ -0,0 +1,11 @@ +Test V15 Required +States 1 +0:X2=0; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:X2=0) +Observation V15 Always 1 0 +Hash=a5e517ea61f31faa909e892130e6b597 + diff --git a/herd/tests/instructions/AArch64.sve/V16.litmus b/herd/tests/instructions/AArch64.sve/V16.litmus new file mode 100644 index 000000000..2885de068 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V16.litmus @@ -0,0 +1,15 @@ +AArch64 V16 +{ + int x[4]={0,1,2,3}; + int y[4]={5,6,7,8}; + 0:X0=x; + 0:X1=y; +} +P0 ; +MOV W2,#0 ; +MOV W3,#4 ; +WHILELT P0.S,W2,W3 ; +LD1W {Z1.S},P0/Z,[X0] ; +LD1W {Z2.S},P0/Z,[X1,Z1.S,UXTW #2]; +forall 0:V1.4S={0,1,2,3} /\ 0:V2.4S={5,6,7,8} + diff --git a/herd/tests/instructions/AArch64.sve/V16.litmus.expected b/herd/tests/instructions/AArch64.sve/V16.litmus.expected new file mode 100644 index 000000000..60414f1b2 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V16.litmus.expected @@ -0,0 +1,11 @@ +Test V16 Required +States 1 +0:V1.4S={0,1,2,3}; 0:V2.4S={5,6,7,8}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={0,1,2,3} /\ 0:V2.4S={5,6,7,8}) +Observation V16 Always 1 0 +Hash=97b55b740335168c5e12a073693d253a + diff --git a/herd/tests/instructions/AArch64.sve/V17.litmus b/herd/tests/instructions/AArch64.sve/V17.litmus new file mode 100644 index 000000000..8c64d0c15 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V17.litmus @@ -0,0 +1,18 @@ +AArch64 V17 +{ + int x[4]; + int y[4]; + 0:X0=x; + 0:X1=y +} +P0 ; +MOV W2,#0 ; +MOV W3,#4 ; +WHILELT P0.S,W2,W3 ; +MOV Z0.S,#4 ; +ST1W {Z0.S},P0,[X0] ; +INDEX Z1.S,W2,#1 ; +ST1W {Z1.S},P0,[X1,Z1.S,UXTW #2] ; + +forall x={4,4,4,4} /\ y={0,1,2,3} + diff --git a/herd/tests/instructions/AArch64.sve/V17.litmus.expected b/herd/tests/instructions/AArch64.sve/V17.litmus.expected new file mode 100644 index 000000000..954794888 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V17.litmus.expected @@ -0,0 +1,11 @@ +Test V17 Required +States 1 +x={4,4,4,4}; y={0,1,2,3}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={4,4,4,4} /\ y={0,1,2,3}) +Observation V17 Always 1 0 +Hash=f50938e525aa0cec3773dbec42193ada + diff --git a/herd/tests/instructions/AArch64.sve/V18.litmus b/herd/tests/instructions/AArch64.sve/V18.litmus new file mode 100644 index 000000000..c23b58bba --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V18.litmus @@ -0,0 +1,11 @@ +AArch64 V18 +{ + int x[8] = {1,2,1,2,1,2,1,2}; + + 0:X0=x; +} +P0 ; + PTRUE P0.S,VL4 ; + LD2W {Z0.S,Z1.S},P0/Z,[X0] ; + +forall 0:V0.4S = {1,1,1,1} /\ 0:V1.4S = {2,2,2,2} diff --git a/herd/tests/instructions/AArch64.sve/V18.litmus.expected b/herd/tests/instructions/AArch64.sve/V18.litmus.expected new file mode 100644 index 000000000..6f9dc5ed9 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V18.litmus.expected @@ -0,0 +1,11 @@ +Test V18 Required +States 1 +0:V0.4S={1,1,1,1}; 0:V1.4S={2,2,2,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={1,1,1,1} /\ 0:V1.4S={2,2,2,2}) +Observation V18 Always 1 0 +Hash=55d8e3d714ba1b661c4ad767c5d3cbee + diff --git a/herd/tests/instructions/AArch64.sve/V19.litmus b/herd/tests/instructions/AArch64.sve/V19.litmus new file mode 100644 index 000000000..9d3f90396 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V19.litmus @@ -0,0 +1,11 @@ +AArch64 V19 +{} +P0 ; +MOV Z0.S,#1 ; +MOV Z1.S,#2 ; +MOV Z2.S,#-1 ; +PTRUE P0.S,VL2 ; +MOVPRFX Z1.S,P0/Z,Z0.S ; +NEG Z1.S,P0/M,Z2.S ; + +forall 0:V1.4S={1,1,0,0} diff --git a/herd/tests/instructions/AArch64.sve/V19.litmus.expected b/herd/tests/instructions/AArch64.sve/V19.litmus.expected new file mode 100644 index 000000000..c24d4df87 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V19.litmus.expected @@ -0,0 +1,11 @@ +Test V19 Required +States 1 +0:V1.4S={1,1,0,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,1,0,0}) +Observation V19 Always 1 0 +Hash=2a7dcf8717c679858b3d9b0347124aca + diff --git a/herd/tests/instructions/AArch64.sve/V20.litmus b/herd/tests/instructions/AArch64.sve/V20.litmus new file mode 100644 index 000000000..933e2331f --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V20.litmus @@ -0,0 +1,11 @@ +AArch64 V20 +{} +P0 ; +MOV Z0.S,#1 ; +MOV Z1.S,#2 ; +MOV Z2.S,#-1 ; +PTRUE P0.S,VL3 ; +MOVPRFX Z1.S,P0/M,Z0.S ; +NEG Z1.S,P0/M,Z2.S ; + +forall 0:V1.4S={1,1,1,2} diff --git a/herd/tests/instructions/AArch64.sve/V20.litmus.expected b/herd/tests/instructions/AArch64.sve/V20.litmus.expected new file mode 100644 index 000000000..a7631e352 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V20.litmus.expected @@ -0,0 +1,11 @@ +Test V20 Required +States 1 +0:V1.4S={1,1,1,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,1,1,2}) +Observation V20 Always 1 0 +Hash=f5073ff7daa0f2b6acb6dea49f4fc61a + diff --git a/herd/tests/instructions/AArch64.sve/V21.litmus b/herd/tests/instructions/AArch64.sve/V21.litmus new file mode 100644 index 000000000..27443c6bb --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V21.litmus @@ -0,0 +1,9 @@ +AArch64 V21 +{} +P0 ; +MOV Z0.S,#-1 ; +MOV Z1.S,#2 ; +PTRUE P0.S,VL3 ; +NEG Z1.S,P0/M,Z0.S ; + +forall 0:V1.4S={1,1,1,2} diff --git a/herd/tests/instructions/AArch64.sve/V21.litmus.expected b/herd/tests/instructions/AArch64.sve/V21.litmus.expected new file mode 100644 index 000000000..1c1af977e --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V21.litmus.expected @@ -0,0 +1,11 @@ +Test V21 Required +States 1 +0:V1.4S={1,1,1,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,1,1,2}) +Observation V21 Always 1 0 +Hash=b2c63565af19e50740733f57fc3bf64a + diff --git a/herd/variant.ml b/herd/variant.ml index 6809bb543..5c33a1e39 100644 --- a/herd/variant.ml +++ b/herd/variant.ml @@ -46,6 +46,7 @@ type t = | TooFar | Morello | Neon + | SVE (* Branch speculation+ cat computation of dependencies *) | Deps | Instances (* Compute dependencies on instruction instances *) @@ -133,6 +134,7 @@ let parse s = match Misc.lowercase s with | "toofar" -> Some TooFar | "morello" -> Some Morello | "neon" -> Some Neon +| "sve" -> Some SVE | "deps" -> Some Deps | "instances"|"instance" -> Some Instances | "kvm" @@ -210,6 +212,7 @@ let pp = function | TooFar -> "TooFar" | Morello -> "Morello" | Neon -> "Neon" + | SVE -> "sve" | Deps -> "Deps" | Instances -> "Instances" | VMSA -> "vmsa" diff --git a/herd/variant.mli b/herd/variant.mli index 9a704db89..cf9ede1e9 100644 --- a/herd/variant.mli +++ b/herd/variant.mli @@ -44,6 +44,7 @@ type t = | TooFar (* Do not discard candidates with TooFar events *) | Morello | Neon + | SVE (* Branch speculation+ cat computation of dependencies *) | Deps | Instances (* Compute dependencies on instruction instances *) diff --git a/jingle/AArch64Arch_jingle.ml b/jingle/AArch64Arch_jingle.ml index 1537a7bfd..9dc51e3c1 100644 --- a/jingle/AArch64Arch_jingle.ml +++ b/jingle/AArch64Arch_jingle.ml @@ -271,6 +271,7 @@ include Arch.MakeArch(struct conv_idx idx >! fun idx -> E.Imm idx | E.Reg (v,r,e,k) -> find_cst k >> fun k -> conv_reg r >! fun r -> E.Reg (v,r,e,k) + | _ -> assert false end in @@ -664,4 +665,12 @@ include Arch.MakeArch(struct | I_MOVI_V _ | I_MOVI_S _ | I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _ -> Warn.fatal "Neon instructions are not implemented yet" + (* Scalable Vector Extension *) + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ | I_DUP_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _ + | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ + | I_MOV_SV _ | I_PTRUE _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ + -> Warn.fatal "SVE instructions are not implemented yet" end) diff --git a/lib/AArch64Base.ml b/lib/AArch64Base.ml index 4410b9aa1..7602fa8c1 100644 --- a/lib/AArch64Base.ml +++ b/lib/AArch64Base.ml @@ -46,6 +46,16 @@ type vec_reg = | V24 | V25 | V26 | V27 | V28 | V29 | V30 | V31 +type pred_reg = + | P0 | P1 | P2 | P3 + | P4 | P5 | P6 | P7 + | P8 | P9 | P10 | P11 + | P12 | P13 | P14 | P15 + + +type pred_mode = + | Zero | Merge + let arrange_specifier = [ (1,64),".1D" ; (1,128),".1Q" ; @@ -56,6 +66,16 @@ let arrange_specifier = (0,8),".B" ; (0,16),".H" ; (0,32),".S" ; (0,64),".D"; ] +let sve_arrange_specifier = +[ + 0,"";8,".B"; 16,".H"; 32,".S"; 64,".D"; 128,".Q"; +] + +let sve_pred_modifier = +[ + Zero,"/Z"; Merge,"/M"; +] + (********************) (* System registers *) (* (Some of...) *) @@ -96,6 +116,9 @@ type reg = | Tag of gpr | Vreg of (vec_reg * (int * int)) | SIMDreg of vec_reg + | Zreg of (vec_reg * int) + | Preg of (pred_reg * int) + | PMreg of (pred_reg * pred_mode) | Symbolic_reg of string | Internal of int | NZCV @@ -173,6 +196,18 @@ let vec_regs = let vregs = List.map (fun v -> Vreg (v,(4,32))) vec_regs +let zregs = List.map (fun z -> Zreg (z,32)) vec_regs + +let pred_regs = +[ + P0 ; P1; P2 ; P3; + P4 ; P5; P6 ; P7; + P8 ; P9; P10; P11; + P12; P13; P14; P15; +] + +let pregs = List.map (fun p -> Preg (p,32)) pred_regs + let linkreg = Ireg R30 let elr_el1 = SysReg ELR_EL1 let tfsr = SysReg TFSR_ELx @@ -294,6 +329,26 @@ let qvrs = V28,"Q28" ; V29,"Q29" ; V30,"Q30" ; V31, "Q31"; ] +let zvrs = +[ + V0 ,"Z0"; V1 ,"Z1"; V2 ,"Z2"; V3 ,"Z3"; + V4 ,"Z4"; V5 ,"Z5"; V6 ,"Z6"; V7 ,"Z7"; + V8 ,"Z8"; V9 ,"Z9"; V10,"Z10"; V11,"Z11"; + V12,"Z12"; V13,"Z13"; V14,"Z14"; V15,"Z15"; + V16,"Z16"; V17,"Z17"; V18,"Z18"; V19,"Z19"; + V20,"Z20"; V21,"Z21"; V22,"Z22"; V23,"Z23"; + V24,"Z24"; V25,"Z25"; V26,"Z26"; V27,"Z27"; + V28,"Z28"; V29,"Z29"; V30,"Z30"; V31,"Z31"; +] + +let pvrs = +[ + P0 ,"P0"; P1 ,"P1"; P2 ,"P2"; P3 ,"P3"; + P4 ,"P4"; P5 ,"P5"; P6 ,"P6"; P7 ,"P7"; + P8 ,"P8"; P9 ,"P9"; P10,"P10"; P11,"P11"; + P12,"P12"; P13,"P13"; P14,"P14"; P15,"P15"; +] + let simd_regs = let rs = bvrs @ hvrs @ svrs @ dvrs @ qvrs in List.map (fun (r,s) -> s,SIMDreg r) rs @@ -333,6 +388,37 @@ let parse_vreg = let parse_simd_reg = parse_some simd_regs +let parse_zreg = + let zplist = parse_list zvrs + and arplist = parse_list sve_arrange_specifier in + fun s -> + try let (g1, g2) = + ignore (Str.search_forward (Str.regexp "\\(Z[0-9]+\\)\\(\\.[B,D,Q,H,S]\\)") (Misc.uppercase s) 0); + (Str.matched_group 1 s, Str.matched_group 2 s); + in Some (Zreg (List.assoc g1 zplist, List.assoc g2 arplist)) + with Not_found -> None + +let parse_preg = + let pplist = parse_list pvrs + and splist = parse_list sve_arrange_specifier in + fun s -> + try let (g1, g2) = + ignore (Str.search_forward (Str.regexp "\\(P[0-9]+\\)\\(\\.[B,D,Q,H,S]\\)?") (Misc.uppercase s) 0); + let suffix = try Str.matched_group 2 s with Not_found -> "" in + (Str.matched_group 1 s, suffix); + in Some (Preg (List.assoc g1 pplist, List.assoc g2 splist)) + with Not_found -> None + +let parse_pmreg = + let pplist = parse_list pvrs + and mplist = parse_list sve_pred_modifier in + fun s -> + try let (g1, g2) = + ignore (Str.search_forward (Str.regexp "\\(P[0-9]+\\)\\(\\/[Z,M]\\)") (Misc.uppercase s) 0); + (Str.matched_group 1 s, Str.matched_group 2 s); + in Some (PMreg (List.assoc g1 pplist, List.assoc g2 mplist)) + with Not_found -> None + let parse_reg s = match parse_vreg s with | Some v -> Some v | None -> @@ -369,9 +455,38 @@ let pp_simd_scalar_reg rl r = match r with | SIMDreg r -> (try List.assoc r rl with Not_found -> assert false) | _ -> assert false +let pp_sve_arrange_specifier s = + try List.assoc s sve_arrange_specifier with Not_found -> assert false + +let pp_zreg r = match r with +| Zreg (r',s) -> + (try List.assoc r' zvrs with Not_found -> assert false) ^ + pp_sve_arrange_specifier s +| _ -> assert false + +let pp_preg_simple r = match r with +| Preg (r',_) +| PMreg (r',_) -> + (try List.assoc r' pvrs with Not_found -> assert false) +| _ -> assert false + +let pp_sve_pred_modifier m = + try List.assoc m sve_pred_modifier with Not_found -> assert false + +let pp_preg r = match r with +| Preg (r',s) -> + (try List.assoc r' pvrs with Not_found -> assert false) ^ + pp_sve_arrange_specifier s +| PMreg (r',m) -> + (try List.assoc r' pvrs with Not_found -> assert false) ^ + pp_sve_pred_modifier m +| _ -> assert false + let pp_reg r = match r with | Vreg _ -> pp_simd_vector_reg r | SIMDreg _ -> pp_simd_scalar_reg vvrs r +| Zreg _ -> pp_zreg r +| Preg _ | PMreg _ -> pp_preg r | _ -> pp_xreg r let pp_i n = match n with | 1 -> "instr:\"NOP\"" @@ -393,6 +508,9 @@ let pp_vreg v r = match v with let reg_compare r1 r2 = let strip_reg r = match r with | Vreg (v,_) -> SIMDreg v + | Zreg (v,_) -> SIMDreg v + | Preg (v,_) -> Preg (v,0) + | PMreg (v,_) -> Preg (v,0) | _ -> r in compare (strip_reg r1) (strip_reg r2) @@ -408,7 +526,6 @@ let type_reg r = | Vreg (_,(n_elt,sz)) -> Array (TestType.tr_nbits sz,n_elt) | _ -> Base "int" - (************) (* Barriers *) (************) @@ -765,6 +882,26 @@ let inverse_cond = function | LS -> HI | AL -> AL +(* Pattern specifier for scalable vector instructions *) +type pattern = + | POW2 + | VL1 + | VL2 + | VL3 + | VL4 + | VL5 + | VL6 + | VL7 + | VL8 + | VL16 + | VL32 + | VL64 + | VL128 + | VL256 + | MUL4 + | MUL3 + | ALL + type op = | ADD | ADDS | SUB | SUBS | AND | ANDS | ORR | ORN | EOR | EON | ASR | LSR | LSL | ROR | BICS | BIC @@ -847,6 +984,7 @@ module MemExt = struct (* Extensions for memory accesses *) type 'k ext = | Imm of 'k idx | Reg of variant * reg * rext * 'k + | ZReg of reg * rext * 'k let v2idx_reg v r = match v with @@ -1152,6 +1290,205 @@ type 'k kinstruction = | I_STLR of variant * reg * reg | I_STXR of variant * st_type * reg * reg * reg | I_STXP of variant * st_type * reg * reg * reg * reg +(* Scalable Vector Extension*) + (* PTRUE .{, } *) + | I_PTRUE of reg * pattern + (* WHILEL{LT,LE,LO,LS} ., , *) + | I_WHILELT of reg * variant * reg * reg + | I_WHILELE of reg * variant * reg * reg + | I_WHILELO of reg * variant * reg * reg + | I_WHILELS of reg * variant * reg * reg + (* UADDV
, , . *) + | I_UADDV of simd_variant * reg * reg * reg + (* + * LD1{B,H,W,D} (scalar plus scalar, single register) + * + * LD1B { . }, /Z, [, ] for T in D,S,H,B + * LD1H { . }, /Z, [, , LSL #1] for T in D,S,H + * LD1W { . }, /Z, [, , LSL #2] for T in D,S + * LD1D { .D }, /Z, [, , LSL #3] + * + * LD1{B,H,W,D} (scalar plus immediate, single register) + * + * LD1B { . }, /Z, [{, #, MUL VL}] for T in D,S,H,B + * LD1H { . }, /Z, [{, #, MUL VL}] for T in D,S,H + * LD1W { .S }, /Z, [{, #, MUL VL}] for T in D,S + * LD1D { .D }, /Z, [{, #, MUL VL}] + * + * LD1{B,H,W,D} (scalar plus vector) + * + * LD1B { .D }, /Z, [, .D, ] + * LD1B { .S }, /Z, [, .S, ] + * LD1B { .D }, /Z, [, .D] + * LD1H { .S }, /Z, [, .S, #1] + * LD1H { .D }, /Z, [, .D, #1] + * LD1H { .D }, /Z, [, .D, ] + * LD1H { .S }, /Z, [, .S, ] + * LD1H { .D }, /Z, [, .D, LSL #1] + * LD1H { .D }, /Z, [, .D] + * LD1W { .S }, /Z, [, .S, #2] + * LD1W { .D }, /Z, [, .D, #2] + * LD1W { .D }, /Z, [, .D, ] + * LD1W { .S }, /Z, [, .S, ] + * LD1W { .D }, /Z, [, .D, LSL #2] + * LD1W { .D }, /Z, [, .D] + * LD1D { .D }, /Z, [, .D, #3] + * LD1D { .D }, /Z, [, .D, ] + * LD1D { .D }, /Z, [, .D, LSL #3] + * LD1D { .D }, /Z, [, .D] + *) + | I_LD1SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * LD2{B,H,W,D} (scalar plus scalar) + * + * LD2B { .B, .B }, /Z, [, ] + * LD2H { .H, .H }, /Z, [, , LSL #1] + * LD2W { .S, .S }, /Z, [, , LSL #2] + * LD2D { .D, .D }, /Z, [, , LSL #3] + * + * LD2{B,H,W,D} (scalar plus immediate) + * + * LD2B { .B, .B }, /Z, [{, #, MUL VL}] + * LD2H { .H, .H }, /Z, [{, #, MUL VL}] + * LD2W { .S, .S }, /Z, [{, #, MUL VL}] + * LD2D { .D, .D }, /Z, [{, #, MUL VL}] + *) + | I_LD2SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * LD3{B,H,W,D} (scalar plus scalar) + * + * LD3B { .B, .B, .B }, /Z, [, ] + * LD3H { .H, .H, .H }, /Z, [, , LSL #1] + * LD3W { .S, .S, .S }, /Z, [, , LSL #2] + * LD3D { .D, .D, .D }, /Z, [, , LSL #3] + * + * LD3{B,H,W,D} (scalar plus immediate) + * + * LD3B { .B, .B, .B }, /Z, [{, #, MUL VL}] + * LD3H { .H, .H, .H }, /Z, [{, #, MUL VL}] + * LD3W { .S, .S, .S }, /Z, [{, #, MUL VL}] + * LD3D { .D, .D, .D }, /Z, [{, #, MUL VL}] + *) + | I_LD3SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * LD4{B,H,W,D} (scalar plus scalar) + * + * LD4B { .B, .B, .B, .B }, /Z, [, ] + * LD4H { .H, .H, .H, .H }, /Z, [, , LSL #1] + * LD4W { .S, .S, .S, .S }, /Z, [, , LSL #2] + * LD4D { .D, .D, .D, .D }, /Z, [, , LSL #3] + * + * LD4{B,H,W,D} (scalar plus immediate) + * + * LD4B { .B, .B, .B, .B }, /Z, [{, #, MUL VL}] + * LD4H { .H, .H, .H, .H }, /Z, [{, #, MUL VL}] + * LD4W { .S, .S, .S, .S }, /Z, [{, #, MUL VL}] + *) + | I_LD4SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * ST1{B,H,W,D} (scalar plus scalar) + * + * ST1B { . }, , [, ] for T in D,S,H,B + * ST1H { . }, , [, , LSL #1] for T in D,S,H + * ST1W { . }, , [, , LSL #2] for T in D,S + * ST1D { .D }, , [, , LSL #3] + * + * ST1{B,H,W,D} (scalar plus immediate) + * + * ST1B { . }, , [{, #, MUL VL}] for T in D,S,H,B + * ST1H { . }, , [{, #, MUL VL}] for T in D,S,H + * ST1W { . }, , [{, #, MUL VL}] for T in D,S + * ST1D { .D }, , [{, #, MUL VL}] + * + * ST1{B,H,W,D} (scalar plus vector) + * + * ST1B { .D }, , [, .D, ] + * ST1B { .S }, , [, .S, ] + * ST1B { .D }, , [, .D] + * ST1H { .S }, , [, .S, #1] + * ST1H { .D }, , [, .D, #1] + * ST1H { .D }, , [, .D, ] + * ST1H { .S }, , [, .S, ] + * ST1H { .D }, , [, .D, LSL #1] + * ST1H { .D }, , [, .D] + * ST1W { .S }, , [, .S, #2] + * ST1W { .D }, , [, .D, #2] + * ST1W { .D }, , [, .D, ] + * ST1W { .S }, , [, .S, ] + * ST1W { .D }, , [, .D, LSL #2] + * ST1W { .D }, , [, .D] + * ST1D { .D }, , [, .D, #3] + * ST1D { .D }, , [, .D, ] + * ST1D { .D }, , [, .D, LSL #3] + * ST1D { .D }, , [, .D] + *) + | I_ST1SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * ST2{B,H,W,D} (scalar plus scalar) + * + * ST2B { .B, .B }, , [, ] + * ST2H { .H, .H }, , [, , LSL #1] + * ST2W { .S, .S }, , [, , LSL #2] + * ST2D { .D, .D }, , [, , LSL #3] + * + * ST2{B,H,W,D} (scalar plus immediate) + * + * ST2B { .B, .B }, , [{, #, MUL VL}] + * ST2H { .H, .H }, , [{, #, MUL VL}] + * ST2W { .S, .S }, , [{, #, MUL VL}] + * ST2D { .D, .D }, , [{, #, MUL VL}] + *) + | I_ST2SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * ST3{B,H,W,D} (scalar plus scalar) + * + * ST3B { .B, .B, .B }, , [, ] + * ST3H { .H, .H, .H }, , [, , LSL #1] + * ST3W { .S, .S, .S }, , [, , LSL #2] + * ST3D { .D, .D, .D }, , [, , LSL #3] + * + * ST3{B,H,W,D} (scalar plus immediate) + * + * ST3B { .B, .B }, , [{, #, MUL VL}] + * ST3H { .H, .H }, , [{, #, MUL VL}] + * ST3W { .S, .S }, , [{, #, MUL VL}] + * ST3D { .D, .D }, , [{, #, MUL VL}] + *) + | I_ST3SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * ST4{B,H,W,D} (scalar plus scalar) + * + * ST4B { .B, .B, .B, .B }, , [, ] + * ST4H { .H, .H, .H, .H }, , [, , LSL #1] + * ST4W { .S, .S, .S, .S }, , [, , LSL #2] + * ST4D { .D, .D, .D, .D }, , [, , LSL #3] + * + * ST4{B,H,W,D} (scalar plus immediate) + * + * ST4B { .B, .B }, , [{, #, MUL VL}] + * ST4H { .H, .H }, , [{, #, MUL VL}] + * ST4W { .S, .S }, , [{, #, MUL VL}] + * ST4D { .D, .D }, , [{, #, MUL VL}] + *) + | I_ST4SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* MOV ., #{, } *) + | I_MOV_SV of reg * 'k * 'k s + (* DUP ., *) + | I_DUP_SV of reg * variant * reg + (* ADD ., ., . *) + | I_ADD_SV of reg * reg * reg + (* NEG ., /M, . *) + | I_NEG_SV of reg * reg * reg + (* MOVPRFX ., /, . *) + | I_MOVPRFX of reg * reg * reg + (* INDEX ., , # *) + | I_INDEX_SI of reg * variant * reg * 'k + (* INDEX ., #, *) + | I_INDEX_IS of reg * variant * 'k * reg + (* INDEX ., , *) + | I_INDEX_SS of reg * variant * reg * reg + (* INDEX ., #, # *) + | I_INDEX_II of reg * 'k * 'k (* Morello *) | I_ALIGND of reg * reg * 'k | I_ALIGNU of reg * reg * 'k @@ -1248,6 +1585,25 @@ let pp_cond = function | LE -> "LE" | AL -> "AL" +let pp_pattern = function + | POW2 -> "POW2" + | VL1 -> "VL1" + | VL2 -> "VL2" + | VL3 -> "VL3" + | VL4 -> "VL4" + | VL5 -> "VL5" + | VL6 -> "VL6" + | VL7 -> "VL7" + | VL8 -> "VL8" + | VL16 -> "VL16" + | VL32 -> "VL32" + | VL64 -> "VL64" + | VL128 -> "VL128" + | VL256 -> "VL256" + | MUL4 -> "MUL4" + | MUL3 -> "MUL3" + | ALL -> "ALL" + let pp_vsimdreg v r = match v with | VSIMD8 -> pp_simd_scalar_reg bvrs r | VSIMD16 -> pp_simd_scalar_reg hvrs r @@ -1255,6 +1611,12 @@ let pp_vsimdreg v r = match v with | VSIMD64 -> pp_simd_scalar_reg dvrs r | VSIMD128 -> pp_simd_scalar_reg qvrs r +let pp_simd_variant v = match v with +| VSIMD8 -> "B" +| VSIMD16 -> "H" +| VSIMD32 -> "W" +| VSIMD64 -> "D" +| VSIMD128 -> "Q" let pp_op = function | ADD -> "ADD" @@ -1375,7 +1737,13 @@ let do_pp_instruction m = | Reg (v,r,(UXTW|SXTW|SXTX as ext),k) when m.zerop k-> sprintf "[%s,%s,%s]" (pp_xreg ra) (pp_vreg v r) (pp_sext ext) | Reg (v,r,ext,k) -> - sprintf "[%s,%s,%s %s]" (pp_xreg ra) (pp_vreg v r) (pp_sext ext) (m.pp_k k) in + sprintf "[%s,%s,%s %s]" (pp_xreg ra) (pp_vreg v r) (pp_sext ext) (m.pp_k k) + | ZReg (r,LSL,k) when m.zerop k -> + sprintf "[%s,%s]" (pp_xreg ra) (pp_zreg r) + | ZReg (r,(UXTW|SXTW|SXTX as ext),k) when m.zerop k-> + sprintf "[%s,%s,%s]" (pp_xreg ra) (pp_zreg r) (pp_sext ext) + | ZReg (r,ext,k) -> + sprintf "[%s,%s,%s %s]" (pp_xreg ra) (pp_zreg r) (pp_sext ext) (m.pp_k k) in let pp_mem_ext memo v rt ra idx = Printf.sprintf "%s %s,%s" memo (pp_vreg v rt) (pp_addr ra idx) in @@ -1601,7 +1969,53 @@ let do_pp_instruction m = pp_simd_rrr "ADD" r1 r2 r3 | I_ADD_SIMD_S (r1,r2,r3) -> pp_simd_srrr "ADD" r1 r2 r3 - +(* Scalable Vector Extention *) + | I_PTRUE (predicate,pattern) -> + sprintf "PTRUE %s,%s" (pp_preg predicate) (pp_pattern pattern) + | I_WHILELT (p1,v,r2,r3) -> + sprintf "WHILELT %s,%s,%s" (pp_preg p1) (pp_vreg v r2) (pp_vreg v r3) + | I_WHILELE (p1,v,r2,r3) -> + sprintf "WHILELE %s,%s,%s" (pp_preg p1) (pp_vreg v r2) (pp_vreg v r3) + | I_WHILELO (p1,v,r2,r3) -> + sprintf "WHILELO %s,%s,%s" (pp_preg p1) (pp_vreg v r2) (pp_vreg v r3) + | I_WHILELS (p1,v,r2,r3) -> + sprintf "WHILELS %s,%s,%s" (pp_preg p1) (pp_vreg v r2) (pp_vreg v r3) + | I_UADDV (v,r1,r2,r3) -> + sprintf "UADDV %s,%s,%s" (pp_vsimdreg v r1) (pp_preg_simple r2) (pp_zreg r3) + | I_LD1SP (v,rs,r2,r3,idx) -> + sprintf "LD1%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg r2) (pp_addr r3 idx) + | I_LD2SP (v,rs,r2,r3,idx) -> + sprintf "LD2%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg r2) (pp_addr r3 idx) + | I_LD3SP (v,rs,r2,r3,idx) -> + sprintf "LD3%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg r2) (pp_addr r3 idx) + | I_LD4SP (v,rs,r2,r3,idx) -> + sprintf "LD4%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg r2) (pp_addr r3 idx) + | I_ST1SP (v,rs,r2,r3,idx) -> + sprintf "ST1%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg_simple r2) (pp_addr r3 idx) + | I_ST2SP (v,rs,r2,r3,idx) -> + sprintf "ST2%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg_simple r2) (pp_addr r3 idx) + | I_ST3SP (v,rs,r2,r3,idx) -> + sprintf "ST3%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg_simple r2) (pp_addr r3 idx) + | I_ST4SP (v,rs,r2,r3,idx) -> + sprintf "ST4%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg_simple r2) (pp_addr r3 idx) + | I_MOV_SV (r,k,s) -> + sprintf "MOV %s,%s%s" (pp_zreg r) (m.pp_k k) (pp_barrel_shift "," s (m.pp_k)) + | I_DUP_SV (r1,v,r2) -> + sprintf "DUP %s,%s" (pp_zreg r1) (pp_vreg v r2) + | I_ADD_SV (r1,r2,r3) -> + sprintf "ADD %s,%s,%s" (pp_zreg r1) (pp_zreg r2) (pp_zreg r3) + | I_NEG_SV (r1,r2,r3) -> + sprintf "NEG %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3) + | I_MOVPRFX (r1,r2,r3) -> + sprintf "MOVPRFX %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3) + | I_INDEX_SI (r1,v,r2,k) -> + sprintf "INDEX %s,%s,%s" (pp_zreg r1) (pp_vreg v r2) (m.pp_k k) + | I_INDEX_IS (r1,v,k,r2) -> + sprintf "INDEX %s,%s,%s" (pp_zreg r1) (m.pp_k k) (pp_vreg v r2) + | I_INDEX_SS (r1,v,r2,r3) -> + sprintf "INDEX %s,%s,%s" (pp_zreg r1) (pp_vreg v r2) (pp_vreg v r3) + | I_INDEX_II (r1,k1,k2) -> + sprintf "INDEX %s,%s,%s" (pp_zreg r1) (m.pp_k k1) (m.pp_k k2) (* Morello *) | I_ALIGND (r1,r2,k) -> sprintf "ALIGND %s,%s,%s" (pp_creg r1) (pp_creg r2) (m.pp_k k) @@ -1828,6 +2242,9 @@ let fold_regs (f_regs,f_sregs) = | Ireg _ -> f_regs reg y_reg,y_sreg | Vreg _ -> f_regs reg y_reg,y_sreg | SIMDreg _ -> f_regs reg y_reg,y_sreg + | Zreg _ -> f_regs reg y_reg,y_sreg + | Preg _ -> f_regs reg y_reg,y_sreg + | PMreg _ -> f_regs reg y_reg,y_sreg | Symbolic_reg reg -> y_reg,f_sregs reg y_sreg | Internal _ | NZCV | ZR | SP | PC | ResAddr | Tag _ | SysReg _ @@ -1841,7 +2258,8 @@ let fold_regs (f_regs,f_sregs) = let open MemExt in match idx with | Imm _ -> c - | Reg (_,r,_,_) -> fold_reg r c in + | Reg (_,r,_,_) -> fold_reg r c + | ZReg (r,_,_) -> fold_reg r c in let fold_op_ext e c = let open OpExt in @@ -1859,6 +2277,9 @@ let fold_regs (f_regs,f_sregs) = | I_CHKSLD r | I_CHKTGD r | I_MOVI_V (r,_,_) | I_MOVI_S (_,r,_) | I_TLBI (_,r) + | I_MOV_SV (r,_,_) + | I_INDEX_II (r,_,_) + | I_PTRUE (r,_) -> fold_reg r c | I_MOV (_,r1,kr) -> fold_reg r1 (fold_kr kr c) @@ -1873,8 +2294,8 @@ let fold_regs (f_regs,f_sregs) = | I_MOV_VE (r1,_,r2,_) | I_MOV_V (r1,r2) | I_MOV_TG (_,r1,r2,_) | I_MOV_FG (r1,_,_,r2) | I_MOV_S (_,r1,r2,_) | I_FMOV_TG (_,r1,_,r2) - | I_DUP (r1,_,r2) - | I_ADDV (_,r1,r2) + | I_DUP (r1,_,r2) | I_DUP_SV (r1,_,r2) + | I_ADDV (_,r1,r2) | I_INDEX_SI (r1,_,r2,_) | I_INDEX_IS (r1,_,_,r2) | I_LDUR_SIMD (_,r1,r2,_) | I_LDAPUR_SIMD (_,r1,r2,_) | I_STUR_SIMD (_,r1,r2,_) | I_STLUR_SIMD (_,r1,r2,_) | I_LDG (r1,r2,_) | I_STZG (r1,r2,_) @@ -1899,6 +2320,15 @@ let fold_regs (f_regs,f_sregs) = | I_ST3 (rs,_,r2,kr) | I_ST3M (rs,r2,kr) | I_ST4 (rs,_,r2,kr) | I_ST4M (rs,r2,kr) -> List.fold_right fold_reg rs (fold_reg r2 (fold_kr kr c)) + | I_LD1SP (_,rs,r2,r3,idx) + | I_LD2SP (_,rs,r2,r3,idx) + | I_LD3SP (_,rs,r2,r3,idx) + | I_LD4SP (_,rs,r2,r3,idx) + | I_ST1SP (_,rs,r2,r3,idx) + | I_ST2SP (_,rs,r2,r3,idx) + | I_ST3SP (_,rs,r2,r3,idx) + | I_ST4SP (_,rs,r2,r3,idx) + -> List.fold_right fold_reg rs (fold_reg r2 ( fold_reg r3 (fold_idx idx c))) | I_CSEL (_,r1,r2,r3,_,_) | I_STXR (_,_,r1,r2,r3) | I_STXRBH (_,_,r1,r2,r3) | I_BUILD (r1,r2,r3) | I_CPYTYPE (r1,r2,r3) | I_CPYVALUE (r1,r2,r3) @@ -1908,6 +2338,9 @@ let fold_regs (f_regs,f_sregs) = | I_STP_SIMD (_,_,r1,r2,r3,_) | I_EOR_SIMD (r1,r2,r3) | I_ADD_SIMD (r1,r2,r3) | I_ADD_SIMD_S (r1,r2,r3) | I_LDXP (_,_,r1,r2,r3) + | I_WHILELT (r1,_,r2,r3) | I_WHILELE (r1,_,r2,r3) | I_WHILELO (r1,_,r2,r3) | I_WHILELS (r1,_,r2,r3) + | I_INDEX_SS(r1,_,r2,r3) + | I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3) | I_NEG_SV (r1,r2,r3) | I_MOVPRFX (r1,r2,r3) -> fold_reg r1 (fold_reg r2 (fold_reg r3 c)) | I_LDP (_,_,r1,r2,r3,_) | I_LDPSW (r1,r2,r3,_) @@ -1934,6 +2367,9 @@ let map_regs f_reg f_symb = | Ireg _ -> f_reg reg | Vreg _ -> f_reg reg | SIMDreg _ -> f_reg reg + | Zreg _ -> f_reg reg + | Preg _ -> f_reg reg + | PMreg _ -> f_reg reg | Symbolic_reg reg -> f_symb reg | Internal _ | ZR | SP| PC | NZCV | ResAddr | Tag _ | SysReg _ @@ -1947,7 +2383,8 @@ let map_regs f_reg f_symb = let open MemExt in match idx with | Imm _ -> idx - | Reg (v,r,ext,k) -> Reg (v,map_reg r,ext,k) in + | Reg (v,r,ext,k) -> Reg (v,map_reg r,ext,k) + | ZReg (r,ext,k) -> ZReg (map_reg r,ext,k) in let map_op_ext e = let open OpExt in @@ -2103,7 +2540,54 @@ let map_regs f_reg f_symb = | I_ADD_SIMD (r1,r2,r3) -> I_ADD_SIMD (map_reg r1,map_reg r2,map_reg r3) | I_ADD_SIMD_S (r1,r2,r3) -> - I_ADD_SIMD_S (map_reg r1,map_reg r2,map_reg r3) + I_ADD_SIMD_S (map_reg r1,map_reg r2,map_reg r3) +(* Scalable Vector Extension *) + | I_PTRUE (predicate,pattern) -> + I_PTRUE (map_reg predicate,pattern) + | I_WHILELT (r1,v,r2,r3) -> + I_WHILELT (map_reg r1,v,map_reg r2,map_reg r3) + | I_WHILELE (r1,v,r2,r3) -> + I_WHILELE (map_reg r1,v,map_reg r2,map_reg r3) + | I_WHILELO (r1,v,r2,r3) -> + I_WHILELO (map_reg r1,v,map_reg r2,map_reg r3) + | I_WHILELS (r1,v,r2,r3) -> + I_WHILELS (map_reg r1,v,map_reg r2,map_reg r3) + | I_UADDV (v,r1,r2,r3) -> + I_UADDV (v,map_reg r1,map_reg r2,map_reg r3) + | I_LD1SP (v,rs,r2,r3,idx) -> + I_LD1SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_LD2SP (v,rs,r2,r3,idx) -> + I_LD2SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_LD3SP (v,rs,r2,r3,idx) -> + I_LD3SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_LD4SP (v,rs,r2,r3,idx) -> + I_LD4SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_ST1SP (v,rs,r2,r3,idx) -> + I_ST1SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_ST2SP (v,rs,r2,r3,idx) -> + I_ST2SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_ST3SP (v,rs,r2,r3,idx) -> + I_ST3SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_ST4SP (v,rs,r2,r3,idx) -> + I_ST4SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_MOV_SV (r,k,os) -> + I_MOV_SV (map_reg r,k,os) + | I_DUP_SV (r1,v,r2) -> + I_DUP_SV (map_reg r1,v,map_reg r2) + | I_ADD_SV (r1,r2,r3) -> + I_ADD_SV (map_reg r1,map_reg r2,map_reg r3) + | I_NEG_SV (r1,r2,r3) -> + I_NEG_SV (map_reg r1,map_reg r2,map_reg r3) + | I_MOVPRFX (r1,r2,r3) -> + I_MOVPRFX (map_reg r1,map_reg r2,map_reg r3) + | I_INDEX_SI (r1,v,r2,k) -> + I_INDEX_SI (map_reg r1,v,map_reg r2,k) + | I_INDEX_IS (r1,v,k,r2) -> + I_INDEX_IS (map_reg r1,v,k,map_reg r2) + | I_INDEX_SS (r1,v,r2,r3) -> + I_INDEX_SS (map_reg r1,v,map_reg r2,map_reg r3) + | I_INDEX_II (r1,k1,k2) -> + I_INDEX_II (map_reg r1,k1,k2) (* Morello *) | I_ALIGNU (r1,r2,k) -> I_ALIGNU(map_reg r1,map_reg r2,k) @@ -2317,6 +2801,12 @@ let get_next = | I_MOVI_V _ | I_MOVI_S _ | I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _ | I_LDXP _|I_STXP _|I_UDF _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ | I_DUP_SV _ | I_PTRUE _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ + | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ + | I_MOV_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _ -> [Label.Next;] (* Check instruction validity, beyond parsing *) @@ -2490,6 +2980,7 @@ module PseudoI = struct match idx with | Imm k -> Imm (idx_tr k) | Reg (v,r,ext,k) -> Reg (v,r,ext,k_tr k) + | ZReg (r,ext,k) -> ZReg (r,ext,k_tr k) let op_ext_tr e = let open OpExt in @@ -2549,6 +3040,10 @@ module PseudoI = struct | I_MOV_S _ | I_LDXP _| I_STXP _ | I_MOPL _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _| I_WHILELS _ + | I_UADDV _ | I_DUP_SV _ | I_PTRUE _ + | I_ADD_SV _ | I_INDEX_SS _ + | I_NEG_SV _ | I_MOVPRFX _ as keep -> keep | I_LDR (v,r1,r2,idx) -> I_LDR (v,r1,r2,ext_tr idx) | I_LDRSW (r1,r2,idx) -> I_LDRSW (r1,r2,ext_tr idx) @@ -2617,6 +3112,18 @@ module PseudoI = struct | I_ADD_SIMD (r1,r2,r3) -> I_ADD_SIMD (r1,r2,r3) | I_ADD_SIMD_S (r1,r2,r3) -> I_ADD_SIMD_S (r1,r2,r3) | I_UDF k -> I_UDF (k_tr k) + | I_MOV_SV (r,k,s) -> I_MOV_SV (r,k_tr k,ap_shift k_tr s) + | I_LD1SP (v,r1,r2,r3,idx) -> I_LD1SP (v,r1,r2,r3,ext_tr idx) + | I_LD2SP (v,r1,r2,r3,idx) -> I_LD2SP (v,r1,r2,r3,ext_tr idx) + | I_LD3SP (v,r1,r2,r3,idx) -> I_LD3SP (v,r1,r2,r3,ext_tr idx) + | I_LD4SP (v,r1,r2,r3,idx) -> I_LD4SP (v,r1,r2,r3,ext_tr idx) + | I_ST1SP (v,r1,r2,r3,idx) -> I_ST1SP (v,r1,r2,r3,ext_tr idx) + | I_ST2SP (v,r1,r2,r3,idx) -> I_ST2SP (v,r1,r2,r3,ext_tr idx) + | I_ST3SP (v,r1,r2,r3,idx) -> I_ST3SP (v,r1,r2,r3,ext_tr idx) + | I_ST4SP (v,r1,r2,r3,idx) -> I_ST4SP (v,r1,r2,r3,ext_tr idx) + | I_INDEX_SI (r1,v,r2,k) -> I_INDEX_SI (r1,v,r2,k_tr k) + | I_INDEX_IS (r1,v,k,r2) -> I_INDEX_IS (r1,v,k_tr k,r2) + | I_INDEX_II (r1,k1,k2) -> I_INDEX_II (r1,k_tr k1,k_tr k2) let get_simd_rpt_selem ins rs = match ins with | I_LD1M _ @@ -2705,6 +3212,12 @@ module PseudoI = struct | I_MOVI_V _ | I_MOVI_S _ | I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _ | I_UDF _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_PTRUE _ + | I_ADD_SV _ | I_UADDV _ | I_DUP_SV _ + | I_NEG_SV _ | I_MOVPRFX _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ + | I_MOV_SV _ -> 0 | I_LD1M (rs, _, _) | I_LD2M (rs, _, _) @@ -2717,6 +3230,9 @@ module PseudoI = struct -> let (rpt, selem) = (get_simd_rpt_selem ins rs) in let n = get_simd_elements rs in rpt * selem * n + | I_LD1SP (_,rs,_,_,_) | I_LD2SP (_,rs,_,_,_) | I_LD3SP (_,rs,_,_,_) | I_LD4SP (_,rs,_,_,_) + | I_ST1SP (_,rs,_,_,_) | I_ST2SP (_,rs,_,_,_) | I_ST3SP (_,rs,_,_,_) | I_ST4SP (_,rs,_,_,_) + -> 16 * List.length rs (* upper bound *) let size_of_ins _ = 4 diff --git a/lib/AArch64Lexer.mll b/lib/AArch64Lexer.mll index 45945669c..033d972d4 100644 --- a/lib/AArch64Lexer.mll +++ b/lib/AArch64Lexer.mll @@ -60,20 +60,20 @@ match name with | "hi" | "HI" -> TOK_HI | "ls" | "LS" -> TOK_LS | "al" | "AL" -> TOK_AL -| "b.eq" | "B.EQ" -> BEQ -| "b.ne" | "B.NE" -> BNE -| "b.ge" | "B.GE" -> BGE +| "b.eq" | "B.EQ" | "b.none" | "B.NONE" -> BEQ +| "b.ne" | "B.NE" | "b.any" | "B.ANY" -> BNE +| "b.ge" | "B.GE" | "b.tcont" | "B.TCONT" -> BGE | "b.gt" | "B.GT" -> BGT | "b.le" | "B.LE" -> BLE -| "b.lt" | "B.LT" -> BLT -| "b.cs" | "B.CS" -> BCS -| "b.cc" | "B.CC" -> BCC -| "b.mi" | "B.MI" -> BMI -| "b.pl" | "B.PL" -> BPL +| "b.lt" | "B.LT" | "b.tstop" | "B.TSTOP" -> BLT +| "b.cs" | "B.CS" | "b.nlast" | "B.NLAST" -> BCS +| "b.cc" | "B.CC" | "b.last" | "B.LAST" -> BCC +| "b.mi" | "B.MI" | "b.first" | "B.FIRST" -> BMI +| "b.pl" | "B.PL" | "b.nfirst" | "B.NFIRST" -> BPL | "b.vs" | "B.VS" -> BVS | "b.vc" | "B.VC" -> BVC | "b.hi" | "B.HI" -> BHI -| "b.ls" | "B.LS" -> BLS +| "b.ls" | "B.LS" | "b.plast" | "B.PLAST" -> BLS | "b.al" | "B.AL" -> BAL | "cbz" | "CBZ" -> CBZ | "cbnz" | "CBNZ" -> CBNZ @@ -145,6 +145,66 @@ match name with | "movi" | "MOVI" -> MOVI | "mvn" | "MVN" -> MVN | "fmov" | "FMOV" -> FMOV +(* Scalabel Vector Extension *) +| "whilelt" | "WHILELT" -> WHILELT +| "whilele" | "WHILELE" -> WHILELE +| "whilelo" | "WHILELO" -> WHILELO +| "whilels" | "WHILELS" -> WHILELS +| "uaddv" | "UADDV" -> UADDV +| "ld1b" | "LD1B" -> LD1B +| "ld1h" | "LD1H" -> LD1H +| "ld1w" | "LD1W" -> LD1W +| "ld1d" | "LD1D" -> LD1D +| "ld2b" | "LD2B" -> LD2B +| "ld2h" | "LD2H" -> LD2H +| "ld2w" | "LD2W" -> LD2W +| "ld2d" | "LD2D" -> LD2D +| "ld3b" | "LD3B" -> LD3B +| "ld3h" | "LD3H" -> LD3H +| "ld3w" | "LD3W" -> LD3W +| "ld3d" | "LD3D" -> LD3D +| "ld4b" | "LD4B" -> LD4B +| "ld4h" | "LD4H" -> LD4H +| "ld4w" | "LD4W" -> LD4W +| "ld4d" | "LD4D" -> LD4D +| "st1b" | "ST1B" -> ST1B +| "st1h" | "ST1H" -> ST1H +| "st1w" | "ST1W" -> ST1W +| "st1d" | "ST1D" -> ST1D +| "st2b" | "ST2B" -> ST2B +| "st2h" | "ST2H" -> ST2H +| "st2w" | "ST2W" -> ST2W +| "st2d" | "ST2D" -> ST2D +| "st3b" | "ST3B" -> ST3B +| "st3h" | "ST3H" -> ST3H +| "st3w" | "ST3W" -> ST3W +| "st3d" | "ST3D" -> ST3D +| "st4b" | "ST4B" -> ST4B +| "st4h" | "ST4H" -> ST4H +| "st4w" | "ST4W" -> ST4W +| "st4d" | "ST4D" -> ST4D +| "index" | "INDEX" -> TOK_INDEX +| "mul" | "MUL" -> TOK_MUL +| "vl" | "VL" -> TOK_VL +| "ptrue" | "PTRUE" -> PTRUE +| "pow2" | "POW2" -> TOK_POW2 +| "vl1" | "VL1" -> TOK_VL1 +| "vl2" | "VL2" -> TOK_VL2 +| "vl3" | "VL3" -> TOK_VL3 +| "vl4" | "VL4" -> TOK_VL4 +| "vl5" | "VL5" -> TOK_VL5 +| "vl6" | "VL6" -> TOK_VL6 +| "vl7" | "VL7" -> TOK_VL7 +| "vl8" | "VL8" -> TOK_VL8 +| "vl16" | "VL16" -> TOK_VL16 +| "vl32" | "VL32" -> TOK_VL32 +| "vl64" | "VL64" -> TOK_VL64 +| "vl128" | "VL128" -> TOK_VL128 +| "vl256" | "VL256" -> TOK_VL256 +| "mul4" | "MUL4" -> TOK_MUL4 +| "mul3" | "MUL3" -> TOK_MUL3 +| "all" | "ALL" -> TOK_ALL +| "movprfx" | "MOVPRFX" -> MOVPRFX (* Compare and swap *) | "cas"|"CAS" -> CAS | "casa"|"CASA" -> CASA @@ -577,9 +637,26 @@ match name with | _ -> assert false end | None -> - begin match A.parse_sysreg name with - | Some r -> SYSREG r - | None -> NAME name + begin match A.parse_zreg name with + | Some r -> ARCH_ZREG r + | None -> + begin match A.parse_pmreg name with + | Some r -> + begin match r with + | A.PMreg (_,A.Zero) -> ARCH_PMREG_Z r + | A.PMreg (_,A.Merge) -> ARCH_PMREG_M r + | _ -> assert false + end + | None -> + begin match A.parse_preg name with + | Some r -> ARCH_PREG r + | None -> + begin match A.parse_sysreg name with + | Some r -> SYSREG r + | None -> NAME name + end + end + end end end end diff --git a/lib/AArch64Parser.mly b/lib/AArch64Parser.mly index 6bc180759..07ee026ba 100644 --- a/lib/AArch64Parser.mly +++ b/lib/AArch64Parser.mly @@ -17,6 +17,14 @@ open AArch64Base +let preg_of_proc p = + match p with + | num -> ( + match parse_preg ("P" ^ string_of_int(num)) with + | Some r -> r + | _ -> raise Parsing.Parse_error + ) + (* No constant third argument for those *) let check_op3 op e = match op,e with @@ -38,6 +46,10 @@ let check_op3 op e = %token ARCH_SREG %token ARCH_DREG %token ARCH_QREG +%token ARCH_ZREG +%token ARCH_PREG +%token ARCH_PMREG_Z +%token ARCH_PMREG_M %token INDEX %token NUM %token NAME @@ -95,6 +107,14 @@ let check_op3 op e = %token LDOPBH %token STOPBH %token UDF +/* Scalable Vector Extention */ +%token WHILELT WHILELE WHILELO WHILELS UADDV TOK_INDEX TOK_MUL TOK_VL PTRUE +%token TOK_ALL TOK_POW2 TOK_MUL3 TOK_MUL4 +%token TOK_VL1 TOK_VL2 TOK_VL3 TOK_VL4 TOK_VL5 TOK_VL6 TOK_VL7 TOK_VL8 +%token TOK_VL16 TOK_VL32 TOK_VL64 TOK_VL128 TOK_VL256 +%token MOVPRFX +%token LD1B LD1H LD1W LD1D LD2B LD2H LD2W LD2D LD3B LD3H LD3W LD3D LD4B LD4H LD4W LD4D +%token ST1B ST1H ST1W ST1D ST2B ST2H ST2W ST2D ST3B ST3H ST3W ST3D ST4B ST4H ST4W ST4D /* /* %token LDUMAX LDUMAXA LDUMAXL LDUMAXAL LDUMAXH LDUMAXAH LDUMAXLH LDUMAXALH @@ -243,6 +263,58 @@ bhsdregs: | sreg { VSIMD32,$1 } | dreg { VSIMD64,$1 } +preg: +| PROC { preg_of_proc $1 } + +preg_sz: +| ARCH_PREG { $1 } + +pmreg_z: +| ARCH_PMREG_Z { $1 } + +pmreg_m: +| ARCH_PMREG_M { $1 } + +pmreg: +| ARCH_PMREG_Z { $1 } +| ARCH_PMREG_M { $1 } + + +zreg: +| ARCH_ZREG { $1 } + +zregs1: +| LCRL zreg RCRL { [$2] } + +zregs2: +| LCRL zreg COMMA zreg RCRL { [$2;$4] } + +zregs3: +| LCRL zreg COMMA zreg COMMA zreg RCRL { [$2;$4;$6] } + +zregs4: +| LCRL zreg COMMA zreg COMMA zreg COMMA zreg RCRL { [$2;$4;$6;$8] } + +pattern: +| { ALL } +| COMMA TOK_ALL { ALL } +| COMMA TOK_POW2 { POW2 } +| COMMA TOK_VL1 { VL1 } +| COMMA TOK_VL2 { VL2 } +| COMMA TOK_VL3 { VL3 } +| COMMA TOK_VL4 { VL4 } +| COMMA TOK_VL5 { VL5 } +| COMMA TOK_VL6 { VL6 } +| COMMA TOK_VL7 { VL7 } +| COMMA TOK_VL8 { VL8 } +| COMMA TOK_VL16 { VL16 } +| COMMA TOK_VL32 { VL32 } +| COMMA TOK_VL64 { VL64 } +| COMMA TOK_VL128 { VL128 } +| COMMA TOK_VL256 { VL256 } +| COMMA TOK_MUL3 { MUL3 } +| COMMA TOK_MUL4 { MUL4 } + k: | NUM { MetaConst.Int $1 } | META { MetaConst.Meta $1 } @@ -339,34 +411,48 @@ mem_idx: { $2,(MetaConst.zero,Idx) } | LBRK cxreg COMMA k RBRK { $2,($4,Idx) } +| LBRK cxreg COMMA k COMMA TOK_MUL TOK_VL RBRK + { $2,($4,Idx) } | LBRK cxreg RBRK COMMA k { $2,($5,PostIdx) } | LBRK cxreg COMMA k RBRK BANG { $2,($4,PreIdx) } +mreg: +| xreg { V64,$1 } +| wreg { V32,$1 } +(* (ab)use V128 to indicate Z regester *) +| zreg { V128,$1 } + mem_ea: | mem_idx { let r,idx = $1 in r,MemExt.Imm idx } -| LBRK cxreg COMMA wxreg RBRK +| LBRK cxreg COMMA mreg RBRK { let open MemExt in let v,r = $4 in - $2,Reg (v,r,LSL,MetaConst.zero) + match v with + | V32|V64 -> $2,Reg (v,r,LSL,MetaConst.zero) + | V128 -> $2,ZReg (r,LSL,MetaConst.zero) } -| LBRK cxreg COMMA wxreg COMMA mem_sext RBRK +| LBRK cxreg COMMA mreg COMMA mem_sext RBRK { let open MemExt in let v,r = $4 in - $2,Reg (v,r,$6,MetaConst.zero) + match v with + | V32|V64 -> $2,Reg (v,r,$6,MetaConst.zero) + | V128 -> $2,ZReg (r,$6,MetaConst.zero) } -| LBRK cxreg COMMA wxreg COMMA mem_sext k RBRK +| LBRK cxreg COMMA mreg COMMA mem_sext k RBRK { let open MemExt in let v,r = $4 in - $2,Reg (v,r,$6,$7) + match v with + | V32|V64 -> $2,Reg (v,r,$6,$7) + | V128 -> $2,ZReg (r,$6,$7) } shift: @@ -723,6 +809,401 @@ instr: { I_ADD_SIMD ($2,$4,$6) } | TOK_ADD dreg COMMA dreg COMMA dreg { I_ADD_SIMD_S ($2,$4,$6)} + /* Scalabel Vector extension */ +| WHILELT preg_sz COMMA xreg COMMA xreg + { I_WHILELT ($2,V64,$4,$6)} +| WHILELT preg_sz COMMA wreg COMMA wreg + { I_WHILELT ($2,V32,$4,$6)} +| WHILELE preg_sz COMMA xreg COMMA xreg + { I_WHILELE ($2,V64,$4,$6)} +| WHILELE preg_sz COMMA wreg COMMA wreg + { I_WHILELE ($2,V32,$4,$6)} +| WHILELO preg_sz COMMA xreg COMMA xreg + { I_WHILELO ($2,V64,$4,$6)} +| WHILELO preg_sz COMMA wreg COMMA wreg + { I_WHILELO ($2,V32,$4,$6)} +| WHILELS preg_sz COMMA xreg COMMA xreg + { I_WHILELS ($2,V64,$4,$6)} +| WHILELS preg_sz COMMA wreg COMMA wreg + { I_WHILELS ($2,V32,$4,$6)} +| UADDV dreg COMMA preg COMMA zreg + { I_UADDV (VSIMD64,$2,$4,$6)} +| LD1B zregs1 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + -> I_LD1SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| LD1H zregs1 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 1) + | ZReg (_,UXTW,MetaConst.Int 1) + | ZReg (_,SXTW,MetaConst.Int 1) + -> I_LD1SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| LD1W zregs1 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 2) + | ZReg (_,UXTW,MetaConst.Int 2) + | ZReg (_,SXTW,MetaConst.Int 2) + -> I_LD1SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| LD1D zregs1 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 3) + | ZReg (_,UXTW,MetaConst.Int 3) + | ZReg (_,SXTW,MetaConst.Int 3) + -> I_LD1SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| LD2B zregs2 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_LD2SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| LD2H zregs2 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_LD2SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| LD2W zregs2 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) -> I_LD2SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| LD2D zregs2 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_LD2SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| LD3B zregs3 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_LD3SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| LD3H zregs3 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_LD3SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| LD3W zregs3 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_LD3SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| LD3D zregs3 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_LD3SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| LD4B zregs4 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_LD4SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| LD4H zregs4 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_LD4SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| LD4W zregs4 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_LD4SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| LD4D zregs4 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_LD4SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| ST1B zregs1 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + -> I_ST1SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| ST1H zregs1 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 1) + | ZReg (_,UXTW,MetaConst.Int 1) + | ZReg (_,SXTW,MetaConst.Int 1) + -> I_ST1SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| ST1W zregs1 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 2) + | ZReg (_,UXTW,MetaConst.Int 2) + | ZReg (_,SXTW,MetaConst.Int 2) + -> I_ST1SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| ST1D zregs1 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 3) + | ZReg (_,UXTW,MetaConst.Int 3) + | ZReg (_,SXTW,MetaConst.Int 3) + -> I_ST1SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| ST2B zregs2 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_ST2SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| ST2H zregs2 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_ST2SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| ST2W zregs2 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_ST2SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| ST2D zregs2 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_ST2SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| ST3B zregs3 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_ST3SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| ST3H zregs3 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_ST3SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| ST3W zregs3 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_ST3SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| ST3D zregs3 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_ST3SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| ST4B zregs4 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_ST4SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| ST4H zregs4 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_ST4SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| ST4W zregs4 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_ST4SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| ST4D zregs4 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_ST4SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| MOV zreg COMMA k + { I_MOV_SV ($2, $4, S_NOEXT) } +| MOV zreg COMMA k COMMA shift + { I_MOV_SV ($2, $4, $6) } +| DUP zreg COMMA wxreg + { let v,r = $4 in + I_DUP_SV ($2 , v, r) } +| TOK_ADD zreg COMMA zreg COMMA zreg + { I_ADD_SV ($2,$4,$6) } +| TOK_INDEX zreg COMMA xreg COMMA k + { match $2 with + | Zreg(_,64) -> I_INDEX_SI ($2,V64,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA wreg COMMA k + { match $2 with + | Zreg(_,8) + | Zreg(_,16) + | Zreg(_,32) -> I_INDEX_SI ($2,V32,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA k COMMA xreg + { match $6 with + | Zreg(_,64) -> I_INDEX_IS ($2,V64,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA k COMMA wreg + { match $6 with + | Zreg(_,8) + | Zreg(_,16) + | Zreg(_,32) -> I_INDEX_IS ($2,V32,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA xreg COMMA xreg + { match $2 with + | Zreg(_,64) -> I_INDEX_SS ($2,V64,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA wreg COMMA wreg + { match $2 with + | Zreg(_,8) + | Zreg(_,16) + | Zreg(_,32) -> I_INDEX_SS ($2,V32,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA k COMMA k + { I_INDEX_II ($2,$4,$6) } +| PTRUE preg_sz pattern + { I_PTRUE ($2,$3) } +| TOK_NEG zreg COMMA pmreg_m COMMA zreg + { I_NEG_SV ($2,$4,$6) } +| MOVPRFX zreg COMMA pmreg COMMA zreg + { I_MOVPRFX ($2,$4,$6) } /* Compare and swap */ | CAS wreg COMMA wreg COMMA LBRK cxreg zeroopt RBRK { I_CAS (V32,RMW_P,$2,$4,$7) } diff --git a/lib/CType.ml b/lib/CType.ml index 6f57fe8fb..d9e9f5a51 100644 --- a/lib/CType.ml +++ b/lib/CType.ml @@ -32,6 +32,8 @@ let voidstar = Pointer void let word = Base "int" let quad = Base "int64_t" let int32x4_t = Base "int32x4_t" +let svbool_t = Base "svbool_t" +let svint32_t = Base "svint32_t" let pteval_t = Base "pteval_t" let pte = Pointer pteval_t let ins_t = Base "ins_t" diff --git a/lib/CType.mli b/lib/CType.mli index c34200ce1..79dd35d75 100644 --- a/lib/CType.mli +++ b/lib/CType.mli @@ -32,6 +32,8 @@ val voidstar : t val word : t val quad : t val int32x4_t : t +val svbool_t : t +val svint32_t : t val pteval_t : t val pte : t val ins_t : t diff --git a/litmus/AArch64Arch_litmus.ml b/litmus/AArch64Arch_litmus.ml index af79fd059..7965ac73d 100644 --- a/litmus/AArch64Arch_litmus.ml +++ b/litmus/AArch64Arch_litmus.ml @@ -36,6 +36,10 @@ module Make(O:Arch_litmus.Config)(V:Constant.S) = struct | Internal i -> sprintf "i%i" i | Vreg (vr, _) | SIMDreg vr -> (try Misc.lowercase (List.assoc vr vvrs) with Not_found -> assert false) + | Preg (pr, _) | PMreg (pr, _) -> + (try Misc.lowercase (List.assoc pr pvrs) with Not_found -> assert false) + | Zreg (zr, _) -> + (try Misc.lowercase (List.assoc zr vvrs) with Not_found -> assert false) (*Intentionally 'vvrs' instead of 'zvrs'*) | _ -> try Misc.lowercase (Hashtbl.find tab r) with Not_found -> assert false @@ -62,18 +66,23 @@ module Make(O:Arch_litmus.Config)(V:Constant.S) = struct in match r with | (Vreg _ | SIMDreg _ ) -> Some (sprintf "vdupq_n_s32(%s)" v, "int32x4_t") + | Zreg _ -> Some (sprintf "svdup_s32(%s)" v, "svint32_t") + | Preg _ | PMreg _ -> Some (sprintf "svdup_b32(%s)" v, "svbool_t") | _ -> None let reg_class reg = match reg with - | Vreg _ | SIMDreg _ -> "=&w" + | Vreg _ | SIMDreg _ | Zreg _ -> "=&w" + | Preg _ | PMreg _ -> "=&Upa" | _ -> "=&r" let reg_class_stable reg = match reg with (* Certain Neon instructions do not affect the whole register, so we need to guarantee that unaffected parts are initialized to zero which basically means that we need to initialize whole register to zero. Several options have been evaluated and it seems the only robust way to achieve that is using the - constraint "+" and the explicit initialization of the 'stable_*' variables *) - | Vreg _ | SIMDreg _ -> "+w" + constraint "+" and the explicit initialization of the 'stable_*' variables. + The same applies to SVE instruction with P/M (merging predicate) *) + | Vreg _ | SIMDreg _ | Zreg _ -> "+w" + | Preg _ | PMreg _ -> "=Upa" | _ -> "=r" let comment = comment diff --git a/litmus/AArch64Compile_litmus.ml b/litmus/AArch64Compile_litmus.ml index fdec62fc6..ab9f0c9b0 100644 --- a/litmus/AArch64Compile_litmus.ml +++ b/litmus/AArch64Compile_litmus.ml @@ -49,6 +49,14 @@ module Make(V:Constant.S)(C:Config) = let stable_regs _ins = match _ins with | I_LDAP1 (rs,_,_,_) + | I_LD1SP (_,rs,_,_,_) + | I_LD2SP (_,rs,_,_,_) + | I_LD3SP (_,rs,_,_,_) + | I_LD4SP (_,rs,_,_,_) + | I_ST1SP (_,rs,_,_,_) + | I_ST2SP (_,rs,_,_,_) + | I_ST3SP (_,rs,_,_,_) + | I_ST4SP (_,rs,_,_,_) | I_LD1 (rs,_,_,_) | I_LD1M (rs,_,_) | I_LD2 (rs,_,_,_) | I_LD2M (rs,_,_) | I_LD2R (rs,_,_) | I_LD3 (rs,_,_,_) | I_LD3M (rs,_,_) | I_LD3R (rs,_,_) @@ -60,6 +68,15 @@ module Make(V:Constant.S)(C:Config) = A.RegSet.of_list rs | I_MOV_FG (r,_,_,_) -> A.RegSet.of_list [r] | I_MOV_VE (r,_,_,_) -> A.RegSet.of_list [r] + (* To allow view of Scalable register via Neon one *) + | I_ADD_SV (r,_,_) + | I_DUP_SV (r,_,_) + | I_MOV_SV (r,_,_) + | I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_) + (* Accept P/M register so assume partial update of register *) + | I_NEG_SV (r,_,_) + | I_MOVPRFX (r,_,_) + -> A.RegSet.of_list [r] | _ -> A.RegSet.empty (* Generic funs for zr *) @@ -135,6 +152,8 @@ module Make(V:Constant.S)(C:Config) = let add_p = add_type voidstar (* pointer *) let add_128 = add_type int32x4_t let add_v v = v2type v |> add_type + let add_svbool_t = add_type svbool_t + let add_svint32_t = add_type svint32_t (* pretty prints barrel shifters *) let pp_shifter = function @@ -305,6 +324,7 @@ module Make(V:Constant.S)(C:Config) = inputs=[rA]@i; outputs=[rD]; reg_env=add_v v i@[(rA,voidstar); (rD,td);]; } + | _ -> assert false let load memo v = match v with @@ -514,6 +534,7 @@ module Make(V:Constant.S)(C:Config) = memo fA fB fC (pp_mem_sext se) k; inputs=rA@[rB]@rC; reg_env=[(rB,voidstar)]@add_v v rA@add_v vC rC; } + | _ -> assert false let stxr memo v r1 r2 r3 = match v with | V32 -> @@ -918,6 +939,319 @@ module Make(V:Constant.S)(C:Config) = outputs = [r1]; reg_env = (add_128 [r1;r2;r3;])} +(* Scalable vector Extension *) + let pp_pattern p = A.pp_pattern p |> Misc.lowercase + + let print_preg io offset i r = match r with + | Preg (_,s) -> "^" ^ io ^ string_of_int (i+offset) ^ + (pp_sve_arrange_specifier s |> Misc.lowercase) + | PMreg (_,m) -> "^" ^ io ^ string_of_int (i+offset) ^ + (pp_sve_pred_modifier m |> Misc.lowercase) + | _ -> assert false + + let print_zreg io offset i r = match r with + | Zreg (_,s) -> "^" ^ io ^ string_of_int (i+offset) ^ + (pp_sve_arrange_specifier s |> Misc.lowercase) + | _ -> assert false + + let while_op memo r1 v r2 r3 = + { empty_ins with + memo = sprintf "%s %s,%s" + memo + (print_preg "o" 0 0 r1) + (match v with | V32 -> "^wi0, ^wi1" | V64 -> "^i0, ^i1" | V128 -> assert false); + inputs = [r2;r3;]; + outputs = [r1]; + reg_env = (add_svbool_t [r1;])@(add_w [r2;r3])} + + let uaddv v r1 r2 r3 = + { empty_ins with + memo = sprintf "uaddv %s,%s,%s" + (print_vecreg v "o" 0) + (print_preg "i" 0 0 r2) + (print_zreg "i" 0 1 r3); + inputs = [r2;r3;]; + outputs = [r1]; + reg_env = (add_128 [r1;])@(add_svbool_t [r2;])@(add_svint32_t [r3;])} + + let ldnsp memo v rs pg rA idx = + let open MemExt in + match idx with + | Imm (0,Idx) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "o" 0) rs)) + (print_preg "i" 0 0 pg); + inputs = [pg;rA]; + outputs = rs; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]} + | Imm (k,Idx) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,#%i,mul vl]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "o" 0) rs)) + (print_preg "i" 0 0 pg) + k; + inputs = [pg;rA]; + outputs = rs; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]} + | Reg (V64,rM,LSL,0) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,^i2]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "o" 0) rs)) + (print_preg "i" 0 0 pg); + inputs = [pg;rA;rM]; + outputs = rs; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]@(add_q [rM;])} + | Reg (V64,rM,LSL,k) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,^i2,LSL #%d]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "o" 0) rs)) + (print_preg "i" 0 0 pg) + k; + inputs = [pg;rA;rM]; + outputs = rs; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]@(add_q [rM;])} + | ZReg(rM,LSL,0) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,%s]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "o" 0 0 r) + (print_preg "i" 0 0 pg) + (print_zreg "i" 0 2 rM); + inputs = [pg;rA;rM]; + outputs = [r]; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | ZReg(rM,se,0) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,%s,%s]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "o" 0 0 r) + (print_preg "i" 0 0 pg) + (print_zreg "i" 0 2 rM) + (pp_mem_sext se); + inputs = [pg;rA;rM]; + outputs = [r]; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | ZReg(rM,se,k) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,%s,%s #%d]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "o" 0 0 r) + (print_preg "i" 0 0 pg) + (print_zreg "i" 0 2 rM) + (pp_mem_sext se) + k; + inputs = [pg;rA;rM]; + outputs = [r]; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | _ -> assert false + + let stnsp memo v rs pg rA idx = + let open MemExt in + match idx with + | Imm (0,Idx) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "i" 2) rs)) + (print_preg "i" 0 0 pg); + inputs = [pg;rA]@rs; + outputs = []; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]} + | Imm (k,Idx) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,#%i,mul vl]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "i" 2) rs)) + (print_preg "i" 0 0 pg) + k; + inputs = [pg;rA]@rs; + outputs = []; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]} + | Reg (V64,rM,LSL,0) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,^i2]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "i" 3) rs)) + (print_preg "i" 0 0 pg); + inputs = [pg;rA;rM]@rs; + outputs = []; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]@(add_q [rM;])} + | Reg (V64,rM,LSL,k) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,^i2,LSL #%d]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "i" 3) rs)) + (print_preg "i" 0 0 pg) + k; + inputs = [pg;rA;rM]@rs; + outputs = []; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]@(add_q [rM;])} + | ZReg(rM,LSL,0) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i2,%s]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "i" 0 0 r) + (print_preg "i" 0 1 pg) + (print_zreg "i" 0 3 rM); + inputs = [r;pg;rA;rM]; + outputs = []; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | ZReg(rM,se,0) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i2,%s,%s]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "i" 0 0 r) + (print_preg "i" 0 1 pg) + (print_zreg "i" 0 3 rM) + (pp_mem_sext se); + inputs = [r;pg;rA;rM]; + outputs = []; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | ZReg(rM,se,k) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i2,%s,%s #%d]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "i" 0 0 r) + (print_preg "i" 0 1 pg) + (print_zreg "i" 0 3 rM) + (pp_mem_sext se) + k; + inputs = [r;pg;rA;rM]; + outputs = []; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | _ -> assert false + + let mov_sv r k s = match s with + | S_NOEXT -> + { empty_ins with + memo = sprintf "mov %s,#%i" (print_zreg "o" 0 0 r) k; + inputs = []; + outputs = [r;]; + reg_env = (add_svint32_t [r])} + | S_LSL(ks) -> + { empty_ins with + memo = sprintf "mov %s,#%i,%s" (print_zreg "o" 0 0 r) k (pp_shifter (S_LSL ks)); + inputs = []; + outputs = [r;]; + reg_env = (add_svint32_t [r])} + | _ -> assert false + + let dup_sv_v r1 v r2 = + { empty_ins with + memo = sprintf "dup %s,%s" + (print_zreg "o" 0 0 r1) + (match v with | V32 -> "^wi0" | V64 -> "^i0" | V128 -> assert false); + inputs = [r2]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;]) @ (( + match v with + | V32 -> add_w + | V64 -> add_q + | V128 -> assert false) [r2;])} + + let add_sv r1 r2 r3 = + { empty_ins with + memo = sprintf "add %s,%s,%s" (print_zreg "o" 0 0 r1) (print_zreg "i" 0 0 r2) (print_zreg "i" 0 1 r3); + inputs = [r2;r3]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;r2;r3;])} + + let index_si r1 v r2 k = + { empty_ins with + memo = sprintf "index %s,%s,#%i" + (print_zreg "o" 0 0 r1) + (match v with | V32 -> "^wi0" | V64 -> "^i0" | V128 -> assert false) + k; + inputs = [r2]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;]) @ (( + match v with + | V32 -> add_w + | V64 -> add_q + | V128 -> assert false) [r2;])} + + let index_is r1 v k r2 = + { empty_ins with + memo = sprintf "index %s,#%i,%s" + (print_zreg "o" 0 0 r1) + k + (match v with | V32 -> "^wi0" | V64 -> "^i0" | V128 -> assert false); + inputs = [r2]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;]) @ (( + match v with + | V32 -> add_w + | V64 -> add_q + | V128 -> assert false) [r2;])} + + let index_ss r1 v r2 r3 = + { empty_ins with + memo = sprintf "index %s,%s" + (print_zreg "o" 0 0 r1) + (match v with | V32 -> "^wi0,^wi1" | V64 -> "^i0,^i1" | V128 -> assert false); + inputs = [r2;r3]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;]) @ (( + match v with + | V32 -> add_w + | V64 -> add_q + | V128 -> assert false) [r2;r3])} + + + let index_ii r1 k1 k2 = + { empty_ins with + memo = sprintf "index %s,#%i,#%i" + (print_zreg "o" 0 0 r1) k1 k2; + inputs = []; + outputs = [r1]; + reg_env = (add_svint32_t [r1;])} + + let ptrue pred pattern = + { empty_ins with + memo = sprintf "ptrue %s,%s" (print_preg "o" 0 0 pred) (pp_pattern pattern); + inputs = []; + outputs = [pred]; + reg_env = (add_svbool_t [pred;])} + + let neg_sv r1 pg r3 = + { empty_ins with + memo = sprintf "neg %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3); + inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *) + outputs = [r1]; + reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])} + + let movprfx r1 pg r3 = + { empty_ins with + memo = sprintf "movprfx %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3); + inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *) + outputs = [r1]; + reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])} + (* Compare and swap *) let cas_memo rmw = Misc.lowercase (cas_memo rmw) @@ -1332,6 +1666,30 @@ module Make(V:Constant.S)(C:Config) = | I_EOR_SIMD (r1,r2,r3) -> eor_simd r1 r2 r3::k | I_ADD_SIMD (r1,r2,r3) -> add_simd r1 r2 r3::k | I_ADD_SIMD_S (r1,r2,r3) -> add_simd_s r1 r2 r3::k +(* Scalable Vector Extension Load and Store *) + | I_WHILELT (z,v,r1,r2) -> while_op "whilelt" z v r1 r2::k + | I_WHILELE (z,v,r1,r2) -> while_op "whilele" z v r1 r2::k + | I_WHILELO (z,v,r1,r2) -> while_op "whilelo" z v r1 r2::k + | I_WHILELS (z,v,r1,r2) -> while_op "whilels" z v r1 r2::k + | I_UADDV (v,r1,r2,r3) -> uaddv v r1 r2 r3::k + | I_LD1SP (v,rs,r2,r3,idx) -> ldnsp "ld1" v rs r2 r3 idx::k + | I_LD2SP (v,rs,r2,r3,idx) -> ldnsp "ld2" v rs r2 r3 idx::k + | I_LD3SP (v,rs,r2,r3,idx) -> ldnsp "ld3" v rs r2 r3 idx::k + | I_LD4SP (v,rs,r2,r3,idx) -> ldnsp "ld4" v rs r2 r3 idx::k + | I_ST1SP (v,rs,r2,r3,idx) -> stnsp "st1" v rs r2 r3 idx::k + | I_ST2SP (v,rs,r2,r3,idx) -> stnsp "st2" v rs r2 r3 idx::k + | I_ST3SP (v,rs,r2,r3,idx) -> stnsp "st3" v rs r2 r3 idx::k + | I_ST4SP (v,rs,r2,r3,idx) -> stnsp "st4" v rs r2 r3 idx::k + | I_MOV_SV (r,kr,s) -> mov_sv r kr s::k + | I_DUP_SV (r1,v,r2) -> dup_sv_v r1 v r2::k + | I_ADD_SV (r1,r2,r3) -> add_sv r1 r2 r3::k + | I_INDEX_SI (r1,v,r2,kr) -> index_si r1 v r2 kr::k + | I_INDEX_IS (r1,v,kr,r2) -> index_is r1 v kr r2::k + | I_INDEX_SS (r1,v,r2,r3) -> index_ss r1 v r2 r3::k + | I_INDEX_II (r1,k1,k2) -> index_ii r1 k1 k2::k + | I_PTRUE (pred,pat) -> ptrue pred pat::k + | I_NEG_SV (r1,r2,r3) -> neg_sv r1 r2 r3::k + | I_MOVPRFX (r1,r2,r3) -> movprfx r1 r2 r3::k (* Arithmetic *) | I_MOV (v,r,K i) -> mov_const v r i::k | I_MOV (v,r1,RV (_,r2)) -> movr v r1 r2::k diff --git a/litmus/ASMLang.ml b/litmus/ASMLang.ml index bb57b5fc4..d983d57e6 100644 --- a/litmus/ASMLang.ml +++ b/litmus/ASMLang.ml @@ -339,7 +339,7 @@ module RegMap = A.RegMap) with Not_found -> Compile.base in match ty with | CType.Array _ when A.arch = `AArch64 -> - fprintf chan "%s%s out_%s = %s;\n" indent + fprintf chan "%s%s out_%s = cast(%s);\n" indent (CType.dump CType.int32x4_t) (dump_stable_reg reg) (dump_stable_reg reg); fprintf chan "%smemcpy(%s, &out_%s, sizeof(%s));\n" indent (compile_out_reg proc reg) (dump_stable_reg reg) (CType.dump ty) diff --git a/litmus/libdir/_aarch64/intrinsics.h b/litmus/libdir/_aarch64/intrinsics.h index f603817d5..1859576b2 100644 --- a/litmus/libdir/_aarch64/intrinsics.h +++ b/litmus/libdir/_aarch64/intrinsics.h @@ -1,3 +1,13 @@ #ifdef __ARM_NEON #include +#define cast(v) \ +({ \ + typeof(v) __in = (v); \ + int32x4_t __out; \ + asm("" : "=w"(__out) : "0"(__in)); \ + __out; \ +}) #endif /* __ARM_NEON */ +#ifdef __ARM_FEATURE_SVE +#include +#endif /* __ARM_FEATURE_SVE */ diff --git a/litmus/libdir/armv8+sve-qemu.cfg b/litmus/libdir/armv8+sve-qemu.cfg new file mode 100644 index 000000000..5362a3f7a --- /dev/null +++ b/litmus/libdir/armv8+sve-qemu.cfg @@ -0,0 +1,15 @@ +-size_of_test = 10k +number_of_run = 100 +avail = 4 +limit = true +memory = direct +stride = 1 +carch = AArch64 +barrier = userfence +smt = 4 +smt_mode = seq +affinity = incr0 +ccopts = -O2 -march=armv8-a+sve +crossrun = qemu:qemu-aarch64 +gcc = aarch64-linux-gnu-gcc +linkopt = -static \ No newline at end of file diff --git a/litmus/template.ml b/litmus/template.ml index 75e9a2b10..3658c2745 100644 --- a/litmus/template.ml +++ b/litmus/template.ml @@ -349,7 +349,7 @@ module Make(O:Config)(A:I) = let digit i = let c = Char.code t.memo.[i] in let n = c - Char.code '0' in - if 0 <= n && n <= 5 then n + if 0 <= n && n <= 6 then n else internal (sprintf "bad digit '%i' (%c)" n t.memo.[i]) and substring i j =