From 3f67773e0994a4b12426634e1fb0f519a6b091d9 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 15 Jan 2024 13:04:03 +0000 Subject: [PATCH] [all]: Add SVE predicated NEG and MOVPRFX instructions Note that both instructions accept P/M (aka merge predicate) which means that destination register can be updated partially thus we mark destination register both input and output in `litmus` Signed-off-by: Vladimir Murzin --- herd/AArch64Arch_herd.ml | 8 +-- herd/AArch64Sem.ml | 70 +++++++++++++++++++ .../tests/instructions/AArch64.sve/V19.litmus | 9 +++ .../AArch64.sve/V19.litmus.expected | 11 +++ .../tests/instructions/AArch64.sve/V20.litmus | 10 +++ .../AArch64.sve/V20.litmus.expected | 11 +++ .../tests/instructions/AArch64.sve/V21.litmus | 10 +++ .../AArch64.sve/V21.litmus.expected | 11 +++ jingle/AArch64Arch_jingle.ml | 2 +- lib/AArch64Base.ml | 20 ++++-- lib/AArch64Lexer.mll | 1 + lib/AArch64Parser.mly | 6 +- litmus/AArch64Compile_litmus.ml | 19 +++++ 13 files changed, 178 insertions(+), 10 deletions(-) create mode 100644 herd/tests/instructions/AArch64.sve/V19.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V19.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V20.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V20.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V21.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V21.litmus.expected diff --git a/herd/AArch64Arch_herd.ml b/herd/AArch64Arch_herd.ml index e69a34975..a67e93e93 100644 --- a/herd/AArch64Arch_herd.ml +++ b/herd/AArch64Arch_herd.ml @@ -115,7 +115,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ | I_MOV_SV _ | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ - | I_DUP_SV _ | I_ADD_SV _ + | I_DUP_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _ -> true let is_cmodx_restricted_value = @@ -300,7 +300,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_UDF _ | I_ADDSUBEXT _ | I_MOPL _ | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ | I_UADDV _ - | I_MOV_SV _ | I_DUP_SV _ | I_ADD_SV _ + | I_MOV_SV _ | I_DUP_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _ | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ -> None @@ -369,7 +369,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_WHILELT (r,_,_,_) | I_WHILELE (r,_,_,_) | I_WHILELO (r,_,_,_) | I_WHILELS (r,_,_,_) | I_UADDV (_,r,_,_) | I_MOV_SV (r,_,_) - | I_DUP_SV (r,_,_) | I_ADD_SV (r,_,_) + | I_DUP_SV (r,_,_) | I_ADD_SV (r,_,_) | I_NEG_SV (r,_,_) | I_MOVPRFX (r,_,_) | I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_) -> [r] | I_MSR (sr,_) @@ -445,7 +445,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_UADDV _ | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ - | I_ADD_SV _ + | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _ | I_MOV_SV _ | I_DUP_SV _ | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ -> MachSize.No diff --git a/herd/AArch64Sem.ml b/herd/AArch64Sem.ml index f8a81cb82..a82124b92 100644 --- a/herd/AArch64Sem.ml +++ b/herd/AArch64Sem.ml @@ -2184,6 +2184,70 @@ module Make let sz = AArch64Base.tr_simd_variant var in write_reg_neon_sz sz r1 v ii + let movprfx dst pg src ii = + let nelem = predicate_nelem src in + let psize = predicate_psize src in + let esize = scalable_esize dst in + let orig = match pg with + | AArch64Base.PMreg (_,AArch64Base.Zero) + -> mzero + | AArch64Base.PMreg (_,AArch64Base.Merge) + -> read_reg_scalable false dst ii + | _ -> assert false + in + orig >>| + read_reg_predicate false pg ii >>= fun (orig,pred) -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (read_reg_scalable false src ii >>= fun src -> + let copy orig cur_val idx = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane cur_val idx esize) + (scalable_getlane orig idx esize) + >>= fun v -> + scalable_setlane cur_val idx esize v + in + let rec reduce orig n op = + match n with + | 0 -> op >>= fun old_val -> copy orig old_val n + | _ -> reduce orig (n-1) (op >>= fun old_val -> copy orig old_val n) + in + reduce orig (nelem-1) (M.unitT src)) + (M.unitT orig) >>= fun v -> + write_reg_scalable dst v ii + + let neg dst pg src ii = + let nelem = predicate_nelem src in + let psize = predicate_psize src in + let esize = scalable_esize dst in + read_reg_scalable false dst ii >>| + read_reg_predicate false pg ii >>= fun (orig,pred) -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (read_reg_scalable false src ii >>= fun src -> + let negate orig cur_val idx = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane cur_val idx esize >>= fun v -> + M.op Op.Sub V.zero v) + (scalable_getlane orig idx esize) + >>= fun v -> + scalable_setlane cur_val idx esize v + in + let rec reduce orig n op = + match n with + | 0 -> op >>= fun old_val -> negate orig old_val n + | _ -> reduce orig (n-1) (op >>= fun old_val -> negate orig old_val n) + in + reduce orig (nelem-1) (M.unitT src)) + (M.unitT orig) >>= fun v -> + write_reg_scalable dst v ii + let while_op compare unsigned p var r1 r2 ii = let psize = predicate_psize p in let nelem = predicate_nelem p in @@ -3182,6 +3246,12 @@ module Make | I_UADDV(var,v,p,z) -> check_sve inst; !(uaddv var v p z ii) + | I_MOVPRFX(r1,pg,r2) -> + check_sve inst; + !(movprfx r1 pg r2 ii) + | I_NEG_SV(r1,pg,r2) -> + check_sve inst; + !(neg r1 pg r2 ii) | I_MOV_SV(r,k,shift) -> check_sve inst; !(mov_sv r k shift ii) diff --git a/herd/tests/instructions/AArch64.sve/V19.litmus b/herd/tests/instructions/AArch64.sve/V19.litmus new file mode 100644 index 000000000..a3b87cf9c --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V19.litmus @@ -0,0 +1,9 @@ +AArch64 V19 +{} +P0 ; +MOV Z0.S,#1 ; +MOV W0,#2 ; +WHILELT P0.S, WZR,W0 ; +MOVPRFX Z1.S,P0/Z,Z0.S ; + +forall 0:V1.4S={1,1,0,0} diff --git a/herd/tests/instructions/AArch64.sve/V19.litmus.expected b/herd/tests/instructions/AArch64.sve/V19.litmus.expected new file mode 100644 index 000000000..92d5c149f --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V19.litmus.expected @@ -0,0 +1,11 @@ +Test V19 Required +States 1 +0:V1.4S={1,1,0,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,1,0,0}) +Observation V19 Always 1 0 +Hash=6e4fe0c6b9f99be6b7106ab39771a536 + diff --git a/herd/tests/instructions/AArch64.sve/V20.litmus b/herd/tests/instructions/AArch64.sve/V20.litmus new file mode 100644 index 000000000..6bfcac838 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V20.litmus @@ -0,0 +1,10 @@ +AArch64 V20 +{} +P0 ; +MOV Z0.S,#1 ; +MOV Z1.S,#2 ; +MOV W0,#3 ; +WHILELT P0.S, WZR,W0 ; +MOVPRFX Z1.S,P0/M,Z0.S ; + +forall 0:V1.4S={1,1,1,2} diff --git a/herd/tests/instructions/AArch64.sve/V20.litmus.expected b/herd/tests/instructions/AArch64.sve/V20.litmus.expected new file mode 100644 index 000000000..219bac023 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V20.litmus.expected @@ -0,0 +1,11 @@ +Test V20 Required +States 1 +0:V1.4S={1,1,1,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,1,1,2}) +Observation V20 Always 1 0 +Hash=677c8c841b77fff00fb418c8756a406c + diff --git a/herd/tests/instructions/AArch64.sve/V21.litmus b/herd/tests/instructions/AArch64.sve/V21.litmus new file mode 100644 index 000000000..7776b457b --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V21.litmus @@ -0,0 +1,10 @@ +AArch64 V21 +{} +P0 ; +MOV Z0.S,#-1 ; +MOV Z1.S,#2 ; +MOV W0,#3 ; +WHILELT P0.S,WZR,W0 ; +NEG Z1.S,P0/M,Z0.S ; + +forall 0:V1.4S={1,1,1,2} diff --git a/herd/tests/instructions/AArch64.sve/V21.litmus.expected b/herd/tests/instructions/AArch64.sve/V21.litmus.expected new file mode 100644 index 000000000..b93a58885 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V21.litmus.expected @@ -0,0 +1,11 @@ +Test V21 Required +States 1 +0:V1.4S={1,1,1,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,1,1,2}) +Observation V21 Always 1 0 +Hash=eb7c6bad69d5d023bf8f5a005309ed49 + diff --git a/jingle/AArch64Arch_jingle.ml b/jingle/AArch64Arch_jingle.ml index 73b1bfc36..76bdba55f 100644 --- a/jingle/AArch64Arch_jingle.ml +++ b/jingle/AArch64Arch_jingle.ml @@ -667,7 +667,7 @@ include Arch.MakeArch(struct -> Warn.fatal "Neon instructions are not implemented yet" (* Scalable Vector Extension *) | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ - | I_UADDV _ | I_DUP_SV _ | I_ADD_SV _ + | I_UADDV _ | I_DUP_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _ | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ | I_MOV_SV _ diff --git a/lib/AArch64Base.ml b/lib/AArch64Base.ml index 9e64965a9..abdf1d937 100644 --- a/lib/AArch64Base.ml +++ b/lib/AArch64Base.ml @@ -1449,6 +1449,10 @@ type 'k kinstruction = | I_DUP_SV of reg * variant * reg (* ADD ., ., . *) | I_ADD_SV of reg * reg * reg + (* NEG ., /M, . *) + | I_NEG_SV of reg * reg * reg + (* MOVPRFX ., /, . *) + | I_MOVPRFX of reg * reg * reg (* INDEX ., , # *) | I_INDEX_SI of reg * variant * reg * 'k (* INDEX ., #, *) @@ -1951,6 +1955,10 @@ let do_pp_instruction m = sprintf "DUP %s,%s" (pp_zreg r1) (pp_vreg v r2) | I_ADD_SV (r1,r2,r3) -> sprintf "ADD %s,%s,%s" (pp_zreg r1) (pp_zreg r2) (pp_zreg r3) + | I_NEG_SV (r1,r2,r3) -> + sprintf "NEG %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3) + | I_MOVPRFX (r1,r2,r3) -> + sprintf "MOVPRFX %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3) | I_INDEX_SI (r1,v,r2,k) -> sprintf "INDEX %s,%s,%s" (pp_zreg r1) (pp_vreg v r2) (m.pp_k k) | I_INDEX_IS (r1,v,k,r2) -> @@ -2282,7 +2290,7 @@ let fold_regs (f_regs,f_sregs) = | I_LDXP (_,_,r1,r2,r3) | I_WHILELT (r1,_,r2,r3) | I_WHILELE (r1,_,r2,r3) | I_WHILELO (r1,_,r2,r3) | I_WHILELS (r1,_,r2,r3) | I_INDEX_SS(r1,_,r2,r3) - | I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3) + | I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3) | I_NEG_SV (r1,r2,r3) | I_MOVPRFX (r1,r2,r3) -> fold_reg r1 (fold_reg r2 (fold_reg r3 c)) | I_LDP (_,_,r1,r2,r3,_) | I_LDPSW (r1,r2,r3,_) @@ -2516,6 +2524,10 @@ let map_regs f_reg f_symb = I_DUP_SV (map_reg r1,v,map_reg r2) | I_ADD_SV (r1,r2,r3) -> I_ADD_SV (map_reg r1,map_reg r2,map_reg r3) + | I_NEG_SV (r1,r2,r3) -> + I_NEG_SV (map_reg r1,map_reg r2,map_reg r3) + | I_MOVPRFX (r1,r2,r3) -> + I_MOVPRFX (map_reg r1,map_reg r2,map_reg r3) | I_INDEX_SI (r1,v,r2,k) -> I_INDEX_SI (map_reg r1,v,map_reg r2,k) | I_INDEX_IS (r1,v,k,r2) -> @@ -2742,7 +2754,7 @@ let get_next = | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ - | I_MOV_SV _ | I_ADD_SV _ + | I_MOV_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _ -> [Label.Next;] (* Check instruction validity, beyond parsing *) @@ -2978,7 +2990,7 @@ module PseudoI = struct | I_MOPL _ | I_WHILELT _ | I_WHILELE _ | I_WHILELO _| I_WHILELS _ | I_UADDV _ | I_DUP_SV _ - | I_ADD_SV _ | I_INDEX_SS _ + | I_ADD_SV _ | I_INDEX_SS _ | I_NEG_SV _ | I_MOVPRFX _ as keep -> keep | I_LDR (v,r1,r2,idx) -> I_LDR (v,r1,r2,ext_tr idx) | I_LDRSW (r1,r2,idx) -> I_LDRSW (r1,r2,ext_tr idx) @@ -3148,7 +3160,7 @@ module PseudoI = struct | I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _ | I_UDF _ | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ - | I_ADD_SV _ | I_UADDV _ | I_DUP_SV _ + | I_ADD_SV _ | I_UADDV _ | I_DUP_SV _| I_NEG_SV _ | I_MOVPRFX _ | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ | I_MOV_SV _ -> 0 diff --git a/lib/AArch64Lexer.mll b/lib/AArch64Lexer.mll index 5f0addac2..0b7275e22 100644 --- a/lib/AArch64Lexer.mll +++ b/lib/AArch64Lexer.mll @@ -186,6 +186,7 @@ match name with | "index" | "INDEX" -> TOK_INDEX | "mul" | "MUL" -> TOK_MUL | "vl" | "VL" -> TOK_VL +| "movprfx" | "MOVPRFX" -> MOVPRFX (* Compare and swap *) | "cas"|"CAS" -> CAS | "casa"|"CASA" -> CASA diff --git a/lib/AArch64Parser.mly b/lib/AArch64Parser.mly index 0e4ef1ad5..e40eaaf9d 100644 --- a/lib/AArch64Parser.mly +++ b/lib/AArch64Parser.mly @@ -108,7 +108,7 @@ let check_op3 op e = %token STOPBH %token UDF /* Scalable Vector Extention */ -%token WHILELT WHILELE WHILELO WHILELS UADDV TOK_INDEX TOK_MUL TOK_VL +%token WHILELT WHILELE WHILELO WHILELS UADDV TOK_INDEX TOK_MUL TOK_VL MOVPRFX %token LD1B LD1H LD1W LD1D LD2B LD2H LD2W LD2D LD3B LD3H LD3W LD3D LD4B LD4H LD4W LD4D %token ST1B ST1H ST1W ST1D ST2B ST2H ST2W ST2D ST3B ST3H ST3W ST3D ST4B ST4H ST4W ST4D /* @@ -1175,6 +1175,10 @@ instr: | _ -> assert false } | TOK_INDEX zreg COMMA k COMMA k { I_INDEX_II ($2,$4,$6) } +| TOK_NEG zreg COMMA pmreg_m COMMA zreg + { I_NEG_SV ($2,$4,$6) } +| MOVPRFX zreg COMMA pmreg COMMA zreg + { I_MOVPRFX ($2,$4,$6) } /* Compare and swap */ | CAS wreg COMMA wreg COMMA LBRK cxreg zeroopt RBRK { I_CAS (V32,RMW_P,$2,$4,$7) } diff --git a/litmus/AArch64Compile_litmus.ml b/litmus/AArch64Compile_litmus.ml index 43cd0f18a..50fca8467 100644 --- a/litmus/AArch64Compile_litmus.ml +++ b/litmus/AArch64Compile_litmus.ml @@ -73,6 +73,9 @@ module Make(V:Constant.S)(C:Config) = | I_DUP_SV (r,_,_) | I_MOV_SV (r,_,_) | I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_) + (* Accept P/M register so assume partial update of register *) + | I_NEG_SV (r,_,_) + | I_MOVPRFX (r,_,_) -> A.RegSet.of_list [r] | _ -> A.RegSet.empty @@ -1227,6 +1230,20 @@ module Make(V:Constant.S)(C:Config) = outputs = [r1]; reg_env = (add_svint32_t [r1;])} + let neg_sv r1 pg r3 = + { empty_ins with + memo = sprintf "neg %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3); + inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *) + outputs = [r1]; + reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])} + + let movprfx r1 pg r3 = + { empty_ins with + memo = sprintf "movprfx %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3); + inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *) + outputs = [r1]; + reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])} + (* Compare and swap *) let cas_memo rmw = Misc.lowercase (cas_memo rmw) @@ -1662,6 +1679,8 @@ module Make(V:Constant.S)(C:Config) = | I_INDEX_IS (r1,v,kr,r2) -> index_is r1 v kr r2::k | I_INDEX_SS (r1,v,r2,r3) -> index_ss r1 v r2 r3::k | I_INDEX_II (r1,k1,k2) -> index_ii r1 k1 k2::k + | I_NEG_SV (r1,r2,r3) -> neg_sv r1 r2 r3::k + | I_MOVPRFX (r1,r2,r3) -> movprfx r1 r2 r3::k (* Arithmetic *) | I_MOV (v,r,K i) -> mov_const v r i::k | I_MOV (v,r1,RV (_,r2)) -> movr v r1 r2::k