Skip to content

Commit

Permalink
[all]: Add SVE predicated NEG and MOVPRFX instructions
Browse files Browse the repository at this point in the history
Note that both instructions accept P/M (aka merge predicate) which
means that destination register can be updated partially thus we mark
destination register both input and output in `litmus`

Signed-off-by: Vladimir Murzin <[email protected]>
  • Loading branch information
Vladimir Murzin committed May 1, 2024
1 parent 4128e86 commit 5092711
Show file tree
Hide file tree
Showing 13 changed files with 180 additions and 3 deletions.
4 changes: 4 additions & 0 deletions herd/AArch64Arch_herd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_MOV_SV _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
| I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _
| I_NEG_SV _ | I_MOVPRFX _
-> true

let is_cmodx_restricted_value =
Expand Down Expand Up @@ -301,6 +302,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _
| I_UADDV _
| I_MOV_SV _ | I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _
| I_NEG_SV _ | I_MOVPRFX _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
-> None

Expand Down Expand Up @@ -370,6 +372,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_UADDV (_,r,_,_)
| I_MOV_SV (r,_,_)
| I_DUP_SV (r,_,_) | I_ADD_SV (r,_,_) | I_PTRUE (r,_)
| I_NEG_SV (r,_,_) | I_MOVPRFX (r,_,_)
| I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_)
-> [r]
| I_MSR (sr,_)
Expand Down Expand Up @@ -446,6 +449,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
| I_ADD_SV _ | I_PTRUE _
| I_NEG_SV _ | I_MOVPRFX _
| I_MOV_SV _ | I_DUP_SV _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
-> MachSize.No
Expand Down
70 changes: 70 additions & 0 deletions herd/AArch64Sem.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2203,6 +2203,70 @@ module Make
let sz = AArch64Base.tr_simd_variant var in
write_reg_neon_sz sz r1 v ii

let movprfx dst pg src ii =
let nelem = predicate_nelem src in
let psize = predicate_psize src in
let esize = scalable_esize dst in
let orig = match pg with
| AArch64Base.PMreg (_,AArch64Base.Zero)
-> mzero
| AArch64Base.PMreg (_,AArch64Base.Merge)
-> read_reg_scalable false dst ii
| _ -> assert false
in
orig >>|
read_reg_predicate false pg ii >>= fun (orig,pred) ->
get_predicate_any pred psize nelem >>= fun any ->
M.choiceT
any
(read_reg_scalable false src ii >>= fun src ->
let copy orig cur_val idx =
get_predicate_last pred psize idx >>= fun last ->
M.choiceT
last
(scalable_getlane cur_val idx esize)
(scalable_getlane orig idx esize)
>>= fun v ->
scalable_setlane cur_val idx esize v
in
let rec reduce orig n op =
match n with
| 0 -> op >>= fun old_val -> copy orig old_val n
| _ -> reduce orig (n-1) (op >>= fun old_val -> copy orig old_val n)
in
reduce orig (nelem-1) (M.unitT src))
(M.unitT orig) >>= fun v ->
write_reg_scalable dst v ii

let neg dst pg src ii =
let nelem = predicate_nelem src in
let psize = predicate_psize src in
let esize = scalable_esize dst in
read_reg_scalable false dst ii >>|
read_reg_predicate false pg ii >>= fun (orig,pred) ->
get_predicate_any pred psize nelem >>= fun any ->
M.choiceT
any
(read_reg_scalable false src ii >>= fun src ->
let negate orig cur_val idx =
get_predicate_last pred psize idx >>= fun last ->
M.choiceT
last
(scalable_getlane cur_val idx esize >>= fun v ->
M.op Op.Sub V.zero v)
(scalable_getlane orig idx esize)
>>= fun v ->
scalable_setlane cur_val idx esize v
in
let rec reduce orig n op =
match n with
| 0 -> op >>= fun old_val -> negate orig old_val n
| _ -> reduce orig (n-1) (op >>= fun old_val -> negate orig old_val n)
in
reduce orig (nelem-1) (M.unitT src))
(M.unitT orig) >>= fun v ->
write_reg_scalable dst v ii

let while_op compare unsigned p var r1 r2 ii =
let psize = predicate_psize p in
let nelem = predicate_nelem p in
Expand Down Expand Up @@ -3223,6 +3287,12 @@ module Make
| I_UADDV(var,v,p,z) ->
check_sve inst;
!(uaddv var v p z ii)
| I_MOVPRFX(r1,pg,r2) ->
check_sve inst;
!(movprfx r1 pg r2 ii)
| I_NEG_SV(r1,pg,r2) ->
check_sve inst;
!(neg r1 pg r2 ii)
| I_MOV_SV(r,k,shift) ->
check_sve inst;
!(mov_sv r k shift ii)
Expand Down
11 changes: 11 additions & 0 deletions herd/tests/instructions/AArch64.sve/V19.litmus
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
AArch64 V19
{}
P0 ;
MOV Z0.S,#1 ;
MOV Z1.S,#2 ;
MOV Z2.S,#-1 ;
PTRUE P0.S,VL2 ;
MOVPRFX Z1.S,P0/Z,Z0.S ;
NEG Z1.S,P0/M,Z2.S ;

forall 0:V1.4S={1,1,0,0}
11 changes: 11 additions & 0 deletions herd/tests/instructions/AArch64.sve/V19.litmus.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Test V19 Required
States 1
0:V1.4S={1,1,0,0};
Ok
Witnesses
Positive: 1 Negative: 0
Flag Scalable-Vector-Extensions-is-work-in-progress
Condition forall (0:V1.4S={1,1,0,0})
Observation V19 Always 1 0
Hash=2a7dcf8717c679858b3d9b0347124aca

11 changes: 11 additions & 0 deletions herd/tests/instructions/AArch64.sve/V20.litmus
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
AArch64 V20
{}
P0 ;
MOV Z0.S,#1 ;
MOV Z1.S,#2 ;
MOV Z2.S,#-1 ;
PTRUE P0.S,VL3 ;
MOVPRFX Z1.S,P0/M,Z0.S ;
NEG Z1.S,P0/M,Z2.S ;

forall 0:V1.4S={1,1,1,2}
11 changes: 11 additions & 0 deletions herd/tests/instructions/AArch64.sve/V20.litmus.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Test V20 Required
States 1
0:V1.4S={1,1,1,2};
Ok
Witnesses
Positive: 1 Negative: 0
Flag Scalable-Vector-Extensions-is-work-in-progress
Condition forall (0:V1.4S={1,1,1,2})
Observation V20 Always 1 0
Hash=f5073ff7daa0f2b6acb6dea49f4fc61a

9 changes: 9 additions & 0 deletions herd/tests/instructions/AArch64.sve/V21.litmus
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
AArch64 V21
{}
P0 ;
MOV Z0.S,#-1 ;
MOV Z1.S,#2 ;
PTRUE P0.S,VL3 ;
NEG Z1.S,P0/M,Z0.S ;

forall 0:V1.4S={1,1,1,2}
11 changes: 11 additions & 0 deletions herd/tests/instructions/AArch64.sve/V21.litmus.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Test V21 Required
States 1
0:V1.4S={1,1,1,2};
Ok
Witnesses
Positive: 1 Negative: 0
Flag Scalable-Vector-Extensions-is-work-in-progress
Condition forall (0:V1.4S={1,1,1,2})
Observation V21 Always 1 0
Hash=b2c63565af19e50740733f57fc3bf64a

2 changes: 1 addition & 1 deletion jingle/AArch64Arch_jingle.ml
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ include Arch.MakeArch(struct
-> Warn.fatal "Neon instructions are not implemented yet"
(* Scalable Vector Extension *)
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _
| I_UADDV _ | I_DUP_SV _ | I_ADD_SV _
| I_UADDV _ | I_DUP_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _
| I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
| I_MOV_SV _ | I_PTRUE _
Expand Down
18 changes: 16 additions & 2 deletions lib/AArch64Base.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1471,6 +1471,10 @@ type 'k kinstruction =
| I_DUP_SV of reg * variant * reg
(* ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> *)
| I_ADD_SV of reg * reg * reg
(* NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> *)
| I_NEG_SV of reg * reg * reg
(* MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> *)
| I_MOVPRFX of reg * reg * reg
(* INDEX <Zd>.<T>, <R><n>, #<imm> *)
| I_INDEX_SI of reg * variant * reg * 'k
(* INDEX <Zd>.<T>, #<imm>, <R><m> *)
Expand Down Expand Up @@ -1994,6 +1998,10 @@ let do_pp_instruction m =
sprintf "DUP %s,%s" (pp_zreg r1) (pp_vreg v r2)
| I_ADD_SV (r1,r2,r3) ->
sprintf "ADD %s,%s,%s" (pp_zreg r1) (pp_zreg r2) (pp_zreg r3)
| I_NEG_SV (r1,r2,r3) ->
sprintf "NEG %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3)
| I_MOVPRFX (r1,r2,r3) ->
sprintf "MOVPRFX %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3)
| I_INDEX_SI (r1,v,r2,k) ->
sprintf "INDEX %s,%s,%s" (pp_zreg r1) (pp_vreg v r2) (m.pp_k k)
| I_INDEX_IS (r1,v,k,r2) ->
Expand Down Expand Up @@ -2326,7 +2334,7 @@ let fold_regs (f_regs,f_sregs) =
| I_LDXP (_,_,r1,r2,r3)
| I_WHILELT (r1,_,r2,r3) | I_WHILELE (r1,_,r2,r3) | I_WHILELO (r1,_,r2,r3) | I_WHILELS (r1,_,r2,r3)
| I_INDEX_SS(r1,_,r2,r3)
| I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3)
| I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3) | I_NEG_SV (r1,r2,r3) | I_MOVPRFX (r1,r2,r3)
-> fold_reg r1 (fold_reg r2 (fold_reg r3 c))
| I_LDP (_,_,r1,r2,r3,_)
| I_LDPSW (r1,r2,r3,_)
Expand Down Expand Up @@ -2562,6 +2570,10 @@ let map_regs f_reg f_symb =
I_DUP_SV (map_reg r1,v,map_reg r2)
| I_ADD_SV (r1,r2,r3) ->
I_ADD_SV (map_reg r1,map_reg r2,map_reg r3)
| I_NEG_SV (r1,r2,r3) ->
I_NEG_SV (map_reg r1,map_reg r2,map_reg r3)
| I_MOVPRFX (r1,r2,r3) ->
I_MOVPRFX (map_reg r1,map_reg r2,map_reg r3)
| I_INDEX_SI (r1,v,r2,k) ->
I_INDEX_SI (map_reg r1,v,map_reg r2,k)
| I_INDEX_IS (r1,v,k,r2) ->
Expand Down Expand Up @@ -2788,7 +2800,7 @@ let get_next =
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
| I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
| I_MOV_SV _ | I_ADD_SV _
| I_MOV_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _
-> [Label.Next;]

(* Check instruction validity, beyond parsing *)
Expand Down Expand Up @@ -3025,6 +3037,7 @@ module PseudoI = struct
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _| I_WHILELS _
| I_UADDV _ | I_DUP_SV _ | I_PTRUE _
| I_ADD_SV _ | I_INDEX_SS _
| I_NEG_SV _ | I_MOVPRFX _
as keep -> keep
| I_LDR (v,r1,r2,idx) -> I_LDR (v,r1,r2,ext_tr idx)
| I_LDRSW (r1,r2,idx) -> I_LDRSW (r1,r2,ext_tr idx)
Expand Down Expand Up @@ -3196,6 +3209,7 @@ module PseudoI = struct
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _
| I_PTRUE _
| I_ADD_SV _ | I_UADDV _ | I_DUP_SV _
| I_NEG_SV _ | I_MOVPRFX _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
| I_MOV_SV _
-> 0
Expand Down
1 change: 1 addition & 0 deletions lib/AArch64Lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ match name with
| "mul4" | "MUL4" -> TOK_MUL4
| "mul3" | "MUL3" -> TOK_MUL3
| "all" | "ALL" -> TOK_ALL
| "movprfx" | "MOVPRFX" -> MOVPRFX
(* Compare and swap *)
| "cas"|"CAS" -> CAS
| "casa"|"CASA" -> CASA
Expand Down
5 changes: 5 additions & 0 deletions lib/AArch64Parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ let check_op3 op e =
%token TOK_ALL TOK_POW2 TOK_MUL3 TOK_MUL4
%token TOK_VL1 TOK_VL2 TOK_VL3 TOK_VL4 TOK_VL5 TOK_VL6 TOK_VL7 TOK_VL8
%token TOK_VL16 TOK_VL32 TOK_VL64 TOK_VL128 TOK_VL256
%token MOVPRFX
%token LD1B LD1H LD1W LD1D LD2B LD2H LD2W LD2D LD3B LD3H LD3W LD3D LD4B LD4H LD4W LD4D
%token ST1B ST1H ST1W ST1D ST2B ST2H ST2W ST2D ST3B ST3H ST3W ST3D ST4B ST4H ST4W ST4D
/*
Expand Down Expand Up @@ -1199,6 +1200,10 @@ instr:
{ I_INDEX_II ($2,$4,$6) }
| PTRUE preg_sz pattern
{ I_PTRUE ($2,$3) }
| TOK_NEG zreg COMMA pmreg_m COMMA zreg
{ I_NEG_SV ($2,$4,$6) }
| MOVPRFX zreg COMMA pmreg COMMA zreg
{ I_MOVPRFX ($2,$4,$6) }
/* Compare and swap */
| CAS wreg COMMA wreg COMMA LBRK cxreg zeroopt RBRK
{ I_CAS (V32,RMW_P,$2,$4,$7) }
Expand Down
19 changes: 19 additions & 0 deletions litmus/AArch64Compile_litmus.ml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ module Make(V:Constant.S)(C:Config) =
| I_DUP_SV (r,_,_)
| I_MOV_SV (r,_,_)
| I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_)
(* Accept P/M register so assume partial update of register *)
| I_NEG_SV (r,_,_)
| I_MOVPRFX (r,_,_)
-> A.RegSet.of_list [r]
| _ -> A.RegSet.empty

Expand Down Expand Up @@ -1252,6 +1255,20 @@ module Make(V:Constant.S)(C:Config) =
outputs = [pred];
reg_env = (add_svbool_t [pred;])}

let neg_sv r1 pg r3 =
{ empty_ins with
memo = sprintf "neg %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3);
inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *)
outputs = [r1];
reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])}

let movprfx r1 pg r3 =
{ empty_ins with
memo = sprintf "movprfx %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3);
inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *)
outputs = [r1];
reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])}

(* Compare and swap *)

let cas_memo rmw = Misc.lowercase (cas_memo rmw)
Expand Down Expand Up @@ -1688,6 +1705,8 @@ module Make(V:Constant.S)(C:Config) =
| I_INDEX_SS (r1,v,r2,r3) -> index_ss r1 v r2 r3::k
| I_INDEX_II (r1,k1,k2) -> index_ii r1 k1 k2::k
| I_PTRUE (pred,pat) -> ptrue pred pat::k
| I_NEG_SV (r1,r2,r3) -> neg_sv r1 r2 r3::k
| I_MOVPRFX (r1,r2,r3) -> movprfx r1 r2 r3::k
(* Arithmetic *)
| I_MOV (v,r,K i) -> mov_const v r i::k
| I_MOV (v,r1,RV (_,r2)) -> movr v r1 r2::k
Expand Down

0 comments on commit 5092711

Please sign in to comment.