From 50927119ea9ebdb09642a8e15fbe1f7a7cba0502 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 15 Jan 2024 13:04:03 +0000 Subject: [PATCH] [all]: Add SVE predicated NEG and MOVPRFX instructions Note that both instructions accept P/M (aka merge predicate) which means that destination register can be updated partially thus we mark destination register both input and output in `litmus` Signed-off-by: Vladimir Murzin --- herd/AArch64Arch_herd.ml | 4 ++ herd/AArch64Sem.ml | 70 +++++++++++++++++++ .../tests/instructions/AArch64.sve/V19.litmus | 11 +++ .../AArch64.sve/V19.litmus.expected | 11 +++ .../tests/instructions/AArch64.sve/V20.litmus | 11 +++ .../AArch64.sve/V20.litmus.expected | 11 +++ .../tests/instructions/AArch64.sve/V21.litmus | 9 +++ .../AArch64.sve/V21.litmus.expected | 11 +++ jingle/AArch64Arch_jingle.ml | 2 +- lib/AArch64Base.ml | 18 ++++- lib/AArch64Lexer.mll | 1 + lib/AArch64Parser.mly | 5 ++ litmus/AArch64Compile_litmus.ml | 19 +++++ 13 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 herd/tests/instructions/AArch64.sve/V19.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V19.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V20.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V20.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V21.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V21.litmus.expected diff --git a/herd/AArch64Arch_herd.ml b/herd/AArch64Arch_herd.ml index 0acfe12822..784a95dc1a 100644 --- a/herd/AArch64Arch_herd.ml +++ b/herd/AArch64Arch_herd.ml @@ -116,6 +116,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_MOV_SV _ | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ | I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _ + | I_NEG_SV _ | I_MOVPRFX _ -> true let is_cmodx_restricted_value = @@ -301,6 +302,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ | I_UADDV _ | I_MOV_SV _ | I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _ + | I_NEG_SV _ | I_MOVPRFX _ | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ -> None @@ -370,6 +372,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_UADDV (_,r,_,_) | I_MOV_SV (r,_,_) | I_DUP_SV (r,_,_) | I_ADD_SV (r,_,_) | I_PTRUE (r,_) + | I_NEG_SV (r,_,_) | I_MOVPRFX (r,_,_) | I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_) -> [r] | I_MSR (sr,_) @@ -446,6 +449,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ | I_ADD_SV _ | I_PTRUE _ + | I_NEG_SV _ | I_MOVPRFX _ | I_MOV_SV _ | I_DUP_SV _ | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ -> MachSize.No diff --git a/herd/AArch64Sem.ml b/herd/AArch64Sem.ml index 206c025414..1a54214b6a 100644 --- a/herd/AArch64Sem.ml +++ b/herd/AArch64Sem.ml @@ -2203,6 +2203,70 @@ module Make let sz = AArch64Base.tr_simd_variant var in write_reg_neon_sz sz r1 v ii + let movprfx dst pg src ii = + let nelem = predicate_nelem src in + let psize = predicate_psize src in + let esize = scalable_esize dst in + let orig = match pg with + | AArch64Base.PMreg (_,AArch64Base.Zero) + -> mzero + | AArch64Base.PMreg (_,AArch64Base.Merge) + -> read_reg_scalable false dst ii + | _ -> assert false + in + orig >>| + read_reg_predicate false pg ii >>= fun (orig,pred) -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (read_reg_scalable false src ii >>= fun src -> + let copy orig cur_val idx = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane cur_val idx esize) + (scalable_getlane orig idx esize) + >>= fun v -> + scalable_setlane cur_val idx esize v + in + let rec reduce orig n op = + match n with + | 0 -> op >>= fun old_val -> copy orig old_val n + | _ -> reduce orig (n-1) (op >>= fun old_val -> copy orig old_val n) + in + reduce orig (nelem-1) (M.unitT src)) + (M.unitT orig) >>= fun v -> + write_reg_scalable dst v ii + + let neg dst pg src ii = + let nelem = predicate_nelem src in + let psize = predicate_psize src in + let esize = scalable_esize dst in + read_reg_scalable false dst ii >>| + read_reg_predicate false pg ii >>= fun (orig,pred) -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (read_reg_scalable false src ii >>= fun src -> + let negate orig cur_val idx = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane cur_val idx esize >>= fun v -> + M.op Op.Sub V.zero v) + (scalable_getlane orig idx esize) + >>= fun v -> + scalable_setlane cur_val idx esize v + in + let rec reduce orig n op = + match n with + | 0 -> op >>= fun old_val -> negate orig old_val n + | _ -> reduce orig (n-1) (op >>= fun old_val -> negate orig old_val n) + in + reduce orig (nelem-1) (M.unitT src)) + (M.unitT orig) >>= fun v -> + write_reg_scalable dst v ii + let while_op compare unsigned p var r1 r2 ii = let psize = predicate_psize p in let nelem = predicate_nelem p in @@ -3223,6 +3287,12 @@ module Make | I_UADDV(var,v,p,z) -> check_sve inst; !(uaddv var v p z ii) + | I_MOVPRFX(r1,pg,r2) -> + check_sve inst; + !(movprfx r1 pg r2 ii) + | I_NEG_SV(r1,pg,r2) -> + check_sve inst; + !(neg r1 pg r2 ii) | I_MOV_SV(r,k,shift) -> check_sve inst; !(mov_sv r k shift ii) diff --git a/herd/tests/instructions/AArch64.sve/V19.litmus b/herd/tests/instructions/AArch64.sve/V19.litmus new file mode 100644 index 0000000000..9d3f903966 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V19.litmus @@ -0,0 +1,11 @@ +AArch64 V19 +{} +P0 ; +MOV Z0.S,#1 ; +MOV Z1.S,#2 ; +MOV Z2.S,#-1 ; +PTRUE P0.S,VL2 ; +MOVPRFX Z1.S,P0/Z,Z0.S ; +NEG Z1.S,P0/M,Z2.S ; + +forall 0:V1.4S={1,1,0,0} diff --git a/herd/tests/instructions/AArch64.sve/V19.litmus.expected b/herd/tests/instructions/AArch64.sve/V19.litmus.expected new file mode 100644 index 0000000000..c24d4df87f --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V19.litmus.expected @@ -0,0 +1,11 @@ +Test V19 Required +States 1 +0:V1.4S={1,1,0,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,1,0,0}) +Observation V19 Always 1 0 +Hash=2a7dcf8717c679858b3d9b0347124aca + diff --git a/herd/tests/instructions/AArch64.sve/V20.litmus b/herd/tests/instructions/AArch64.sve/V20.litmus new file mode 100644 index 0000000000..933e2331f2 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V20.litmus @@ -0,0 +1,11 @@ +AArch64 V20 +{} +P0 ; +MOV Z0.S,#1 ; +MOV Z1.S,#2 ; +MOV Z2.S,#-1 ; +PTRUE P0.S,VL3 ; +MOVPRFX Z1.S,P0/M,Z0.S ; +NEG Z1.S,P0/M,Z2.S ; + +forall 0:V1.4S={1,1,1,2} diff --git a/herd/tests/instructions/AArch64.sve/V20.litmus.expected b/herd/tests/instructions/AArch64.sve/V20.litmus.expected new file mode 100644 index 0000000000..a7631e3525 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V20.litmus.expected @@ -0,0 +1,11 @@ +Test V20 Required +States 1 +0:V1.4S={1,1,1,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,1,1,2}) +Observation V20 Always 1 0 +Hash=f5073ff7daa0f2b6acb6dea49f4fc61a + diff --git a/herd/tests/instructions/AArch64.sve/V21.litmus b/herd/tests/instructions/AArch64.sve/V21.litmus new file mode 100644 index 0000000000..27443c6bb7 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V21.litmus @@ -0,0 +1,9 @@ +AArch64 V21 +{} +P0 ; +MOV Z0.S,#-1 ; +MOV Z1.S,#2 ; +PTRUE P0.S,VL3 ; +NEG Z1.S,P0/M,Z0.S ; + +forall 0:V1.4S={1,1,1,2} diff --git a/herd/tests/instructions/AArch64.sve/V21.litmus.expected b/herd/tests/instructions/AArch64.sve/V21.litmus.expected new file mode 100644 index 0000000000..1c1af977eb --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V21.litmus.expected @@ -0,0 +1,11 @@ +Test V21 Required +States 1 +0:V1.4S={1,1,1,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,1,1,2}) +Observation V21 Always 1 0 +Hash=b2c63565af19e50740733f57fc3bf64a + diff --git a/jingle/AArch64Arch_jingle.ml b/jingle/AArch64Arch_jingle.ml index 9efe7568d2..9dc51e3c19 100644 --- a/jingle/AArch64Arch_jingle.ml +++ b/jingle/AArch64Arch_jingle.ml @@ -667,7 +667,7 @@ include Arch.MakeArch(struct -> Warn.fatal "Neon instructions are not implemented yet" (* Scalable Vector Extension *) | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ - | I_UADDV _ | I_DUP_SV _ | I_ADD_SV _ + | I_UADDV _ | I_DUP_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _ | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ | I_MOV_SV _ | I_PTRUE _ diff --git a/lib/AArch64Base.ml b/lib/AArch64Base.ml index b919cf3cb6..6ed807e118 100644 --- a/lib/AArch64Base.ml +++ b/lib/AArch64Base.ml @@ -1471,6 +1471,10 @@ type 'k kinstruction = | I_DUP_SV of reg * variant * reg (* ADD ., ., . *) | I_ADD_SV of reg * reg * reg + (* NEG ., /M, . *) + | I_NEG_SV of reg * reg * reg + (* MOVPRFX ., /, . *) + | I_MOVPRFX of reg * reg * reg (* INDEX ., , # *) | I_INDEX_SI of reg * variant * reg * 'k (* INDEX ., #, *) @@ -1994,6 +1998,10 @@ let do_pp_instruction m = sprintf "DUP %s,%s" (pp_zreg r1) (pp_vreg v r2) | I_ADD_SV (r1,r2,r3) -> sprintf "ADD %s,%s,%s" (pp_zreg r1) (pp_zreg r2) (pp_zreg r3) + | I_NEG_SV (r1,r2,r3) -> + sprintf "NEG %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3) + | I_MOVPRFX (r1,r2,r3) -> + sprintf "MOVPRFX %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3) | I_INDEX_SI (r1,v,r2,k) -> sprintf "INDEX %s,%s,%s" (pp_zreg r1) (pp_vreg v r2) (m.pp_k k) | I_INDEX_IS (r1,v,k,r2) -> @@ -2326,7 +2334,7 @@ let fold_regs (f_regs,f_sregs) = | I_LDXP (_,_,r1,r2,r3) | I_WHILELT (r1,_,r2,r3) | I_WHILELE (r1,_,r2,r3) | I_WHILELO (r1,_,r2,r3) | I_WHILELS (r1,_,r2,r3) | I_INDEX_SS(r1,_,r2,r3) - | I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3) + | I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3) | I_NEG_SV (r1,r2,r3) | I_MOVPRFX (r1,r2,r3) -> fold_reg r1 (fold_reg r2 (fold_reg r3 c)) | I_LDP (_,_,r1,r2,r3,_) | I_LDPSW (r1,r2,r3,_) @@ -2562,6 +2570,10 @@ let map_regs f_reg f_symb = I_DUP_SV (map_reg r1,v,map_reg r2) | I_ADD_SV (r1,r2,r3) -> I_ADD_SV (map_reg r1,map_reg r2,map_reg r3) + | I_NEG_SV (r1,r2,r3) -> + I_NEG_SV (map_reg r1,map_reg r2,map_reg r3) + | I_MOVPRFX (r1,r2,r3) -> + I_MOVPRFX (map_reg r1,map_reg r2,map_reg r3) | I_INDEX_SI (r1,v,r2,k) -> I_INDEX_SI (map_reg r1,v,map_reg r2,k) | I_INDEX_IS (r1,v,k,r2) -> @@ -2788,7 +2800,7 @@ let get_next = | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ - | I_MOV_SV _ | I_ADD_SV _ + | I_MOV_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _ -> [Label.Next;] (* Check instruction validity, beyond parsing *) @@ -3025,6 +3037,7 @@ module PseudoI = struct | I_WHILELT _ | I_WHILELE _ | I_WHILELO _| I_WHILELS _ | I_UADDV _ | I_DUP_SV _ | I_PTRUE _ | I_ADD_SV _ | I_INDEX_SS _ + | I_NEG_SV _ | I_MOVPRFX _ as keep -> keep | I_LDR (v,r1,r2,idx) -> I_LDR (v,r1,r2,ext_tr idx) | I_LDRSW (r1,r2,idx) -> I_LDRSW (r1,r2,ext_tr idx) @@ -3196,6 +3209,7 @@ module PseudoI = struct | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ | I_PTRUE _ | I_ADD_SV _ | I_UADDV _ | I_DUP_SV _ + | I_NEG_SV _ | I_MOVPRFX _ | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ | I_MOV_SV _ -> 0 diff --git a/lib/AArch64Lexer.mll b/lib/AArch64Lexer.mll index b64544f447..033d972d40 100644 --- a/lib/AArch64Lexer.mll +++ b/lib/AArch64Lexer.mll @@ -204,6 +204,7 @@ match name with | "mul4" | "MUL4" -> TOK_MUL4 | "mul3" | "MUL3" -> TOK_MUL3 | "all" | "ALL" -> TOK_ALL +| "movprfx" | "MOVPRFX" -> MOVPRFX (* Compare and swap *) | "cas"|"CAS" -> CAS | "casa"|"CASA" -> CASA diff --git a/lib/AArch64Parser.mly b/lib/AArch64Parser.mly index 46f07c6a4f..07ee026ba0 100644 --- a/lib/AArch64Parser.mly +++ b/lib/AArch64Parser.mly @@ -112,6 +112,7 @@ let check_op3 op e = %token TOK_ALL TOK_POW2 TOK_MUL3 TOK_MUL4 %token TOK_VL1 TOK_VL2 TOK_VL3 TOK_VL4 TOK_VL5 TOK_VL6 TOK_VL7 TOK_VL8 %token TOK_VL16 TOK_VL32 TOK_VL64 TOK_VL128 TOK_VL256 +%token MOVPRFX %token LD1B LD1H LD1W LD1D LD2B LD2H LD2W LD2D LD3B LD3H LD3W LD3D LD4B LD4H LD4W LD4D %token ST1B ST1H ST1W ST1D ST2B ST2H ST2W ST2D ST3B ST3H ST3W ST3D ST4B ST4H ST4W ST4D /* @@ -1199,6 +1200,10 @@ instr: { I_INDEX_II ($2,$4,$6) } | PTRUE preg_sz pattern { I_PTRUE ($2,$3) } +| TOK_NEG zreg COMMA pmreg_m COMMA zreg + { I_NEG_SV ($2,$4,$6) } +| MOVPRFX zreg COMMA pmreg COMMA zreg + { I_MOVPRFX ($2,$4,$6) } /* Compare and swap */ | CAS wreg COMMA wreg COMMA LBRK cxreg zeroopt RBRK { I_CAS (V32,RMW_P,$2,$4,$7) } diff --git a/litmus/AArch64Compile_litmus.ml b/litmus/AArch64Compile_litmus.ml index 4fa135bf6d..04ef82b2b2 100644 --- a/litmus/AArch64Compile_litmus.ml +++ b/litmus/AArch64Compile_litmus.ml @@ -73,6 +73,9 @@ module Make(V:Constant.S)(C:Config) = | I_DUP_SV (r,_,_) | I_MOV_SV (r,_,_) | I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_) + (* Accept P/M register so assume partial update of register *) + | I_NEG_SV (r,_,_) + | I_MOVPRFX (r,_,_) -> A.RegSet.of_list [r] | _ -> A.RegSet.empty @@ -1252,6 +1255,20 @@ module Make(V:Constant.S)(C:Config) = outputs = [pred]; reg_env = (add_svbool_t [pred;])} + let neg_sv r1 pg r3 = + { empty_ins with + memo = sprintf "neg %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3); + inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *) + outputs = [r1]; + reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])} + + let movprfx r1 pg r3 = + { empty_ins with + memo = sprintf "movprfx %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3); + inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *) + outputs = [r1]; + reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])} + (* Compare and swap *) let cas_memo rmw = Misc.lowercase (cas_memo rmw) @@ -1688,6 +1705,8 @@ module Make(V:Constant.S)(C:Config) = | I_INDEX_SS (r1,v,r2,r3) -> index_ss r1 v r2 r3::k | I_INDEX_II (r1,k1,k2) -> index_ii r1 k1 k2::k | I_PTRUE (pred,pat) -> ptrue pred pat::k + | I_NEG_SV (r1,r2,r3) -> neg_sv r1 r2 r3::k + | I_MOVPRFX (r1,r2,r3) -> movprfx r1 r2 r3::k (* Arithmetic *) | I_MOV (v,r,K i) -> mov_const v r i::k | I_MOV (v,r1,RV (_,r2)) -> movr v r1 r2::k