Skip to content

Commit

Permalink
[all]: Add SVE predicated NEG and MOVPRFX instructions
Browse files Browse the repository at this point in the history
Note that both instructions accept P/M (aka merge predicate) which
means that destination register can be updated partially thus we mark
destination register both input and output in `litmus`

Signed-off-by: Vladimir Murzin <[email protected]>
  • Loading branch information
Vladimir Murzin committed Apr 16, 2024
1 parent 39967df commit 3f67773
Show file tree
Hide file tree
Showing 13 changed files with 178 additions and 10 deletions.
8 changes: 4 additions & 4 deletions herd/AArch64Arch_herd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
| I_MOV_SV _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
| I_DUP_SV _ | I_ADD_SV _
| I_DUP_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _
-> true

let is_cmodx_restricted_value =
Expand Down Expand Up @@ -300,7 +300,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_UDF _ | I_ADDSUBEXT _ | I_MOPL _
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _
| I_UADDV _
| I_MOV_SV _ | I_DUP_SV _ | I_ADD_SV _
| I_MOV_SV _ | I_DUP_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
-> None

Expand Down Expand Up @@ -369,7 +369,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_WHILELT (r,_,_,_) | I_WHILELE (r,_,_,_) | I_WHILELO (r,_,_,_) | I_WHILELS (r,_,_,_)
| I_UADDV (_,r,_,_)
| I_MOV_SV (r,_,_)
| I_DUP_SV (r,_,_) | I_ADD_SV (r,_,_)
| I_DUP_SV (r,_,_) | I_ADD_SV (r,_,_) | I_NEG_SV (r,_,_) | I_MOVPRFX (r,_,_)
| I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_)
-> [r]
| I_MSR (sr,_)
Expand Down Expand Up @@ -445,7 +445,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_UADDV _
| I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
| I_ADD_SV _
| I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _
| I_MOV_SV _ | I_DUP_SV _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
-> MachSize.No
Expand Down
70 changes: 70 additions & 0 deletions herd/AArch64Sem.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2184,6 +2184,70 @@ module Make
let sz = AArch64Base.tr_simd_variant var in
write_reg_neon_sz sz r1 v ii

let movprfx dst pg src ii =
let nelem = predicate_nelem src in
let psize = predicate_psize src in
let esize = scalable_esize dst in
let orig = match pg with
| AArch64Base.PMreg (_,AArch64Base.Zero)
-> mzero
| AArch64Base.PMreg (_,AArch64Base.Merge)
-> read_reg_scalable false dst ii
| _ -> assert false
in
orig >>|
read_reg_predicate false pg ii >>= fun (orig,pred) ->
get_predicate_any pred psize nelem >>= fun any ->
M.choiceT
any
(read_reg_scalable false src ii >>= fun src ->
let copy orig cur_val idx =
get_predicate_last pred psize idx >>= fun last ->
M.choiceT
last
(scalable_getlane cur_val idx esize)
(scalable_getlane orig idx esize)
>>= fun v ->
scalable_setlane cur_val idx esize v
in
let rec reduce orig n op =
match n with
| 0 -> op >>= fun old_val -> copy orig old_val n
| _ -> reduce orig (n-1) (op >>= fun old_val -> copy orig old_val n)
in
reduce orig (nelem-1) (M.unitT src))
(M.unitT orig) >>= fun v ->
write_reg_scalable dst v ii

let neg dst pg src ii =
let nelem = predicate_nelem src in
let psize = predicate_psize src in
let esize = scalable_esize dst in
read_reg_scalable false dst ii >>|
read_reg_predicate false pg ii >>= fun (orig,pred) ->
get_predicate_any pred psize nelem >>= fun any ->
M.choiceT
any
(read_reg_scalable false src ii >>= fun src ->
let negate orig cur_val idx =
get_predicate_last pred psize idx >>= fun last ->
M.choiceT
last
(scalable_getlane cur_val idx esize >>= fun v ->
M.op Op.Sub V.zero v)
(scalable_getlane orig idx esize)
>>= fun v ->
scalable_setlane cur_val idx esize v
in
let rec reduce orig n op =
match n with
| 0 -> op >>= fun old_val -> negate orig old_val n
| _ -> reduce orig (n-1) (op >>= fun old_val -> negate orig old_val n)
in
reduce orig (nelem-1) (M.unitT src))
(M.unitT orig) >>= fun v ->
write_reg_scalable dst v ii

let while_op compare unsigned p var r1 r2 ii =
let psize = predicate_psize p in
let nelem = predicate_nelem p in
Expand Down Expand Up @@ -3182,6 +3246,12 @@ module Make
| I_UADDV(var,v,p,z) ->
check_sve inst;
!(uaddv var v p z ii)
| I_MOVPRFX(r1,pg,r2) ->
check_sve inst;
!(movprfx r1 pg r2 ii)
| I_NEG_SV(r1,pg,r2) ->
check_sve inst;
!(neg r1 pg r2 ii)
| I_MOV_SV(r,k,shift) ->
check_sve inst;
!(mov_sv r k shift ii)
Expand Down
9 changes: 9 additions & 0 deletions herd/tests/instructions/AArch64.sve/V19.litmus
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
AArch64 V19
{}
P0 ;
MOV Z0.S,#1 ;
MOV W0,#2 ;
WHILELT P0.S, WZR,W0 ;
MOVPRFX Z1.S,P0/Z,Z0.S ;

forall 0:V1.4S={1,1,0,0}
11 changes: 11 additions & 0 deletions herd/tests/instructions/AArch64.sve/V19.litmus.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Test V19 Required
States 1
0:V1.4S={1,1,0,0};
Ok
Witnesses
Positive: 1 Negative: 0
Flag Scalable-Vector-Extensions-is-work-in-progress
Condition forall (0:V1.4S={1,1,0,0})
Observation V19 Always 1 0
Hash=6e4fe0c6b9f99be6b7106ab39771a536

10 changes: 10 additions & 0 deletions herd/tests/instructions/AArch64.sve/V20.litmus
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
AArch64 V20
{}
P0 ;
MOV Z0.S,#1 ;
MOV Z1.S,#2 ;
MOV W0,#3 ;
WHILELT P0.S, WZR,W0 ;
MOVPRFX Z1.S,P0/M,Z0.S ;

forall 0:V1.4S={1,1,1,2}
11 changes: 11 additions & 0 deletions herd/tests/instructions/AArch64.sve/V20.litmus.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Test V20 Required
States 1
0:V1.4S={1,1,1,2};
Ok
Witnesses
Positive: 1 Negative: 0
Flag Scalable-Vector-Extensions-is-work-in-progress
Condition forall (0:V1.4S={1,1,1,2})
Observation V20 Always 1 0
Hash=677c8c841b77fff00fb418c8756a406c

10 changes: 10 additions & 0 deletions herd/tests/instructions/AArch64.sve/V21.litmus
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
AArch64 V21
{}
P0 ;
MOV Z0.S,#-1 ;
MOV Z1.S,#2 ;
MOV W0,#3 ;
WHILELT P0.S,WZR,W0 ;
NEG Z1.S,P0/M,Z0.S ;

forall 0:V1.4S={1,1,1,2}
11 changes: 11 additions & 0 deletions herd/tests/instructions/AArch64.sve/V21.litmus.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Test V21 Required
States 1
0:V1.4S={1,1,1,2};
Ok
Witnesses
Positive: 1 Negative: 0
Flag Scalable-Vector-Extensions-is-work-in-progress
Condition forall (0:V1.4S={1,1,1,2})
Observation V21 Always 1 0
Hash=eb7c6bad69d5d023bf8f5a005309ed49

2 changes: 1 addition & 1 deletion jingle/AArch64Arch_jingle.ml
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ include Arch.MakeArch(struct
-> Warn.fatal "Neon instructions are not implemented yet"
(* Scalable Vector Extension *)
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _
| I_UADDV _ | I_DUP_SV _ | I_ADD_SV _
| I_UADDV _ | I_DUP_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _
| I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
| I_MOV_SV _
Expand Down
20 changes: 16 additions & 4 deletions lib/AArch64Base.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1449,6 +1449,10 @@ type 'k kinstruction =
| I_DUP_SV of reg * variant * reg
(* ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> *)
| I_ADD_SV of reg * reg * reg
(* NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> *)
| I_NEG_SV of reg * reg * reg
(* MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> *)
| I_MOVPRFX of reg * reg * reg
(* INDEX <Zd>.<T>, <R><n>, #<imm> *)
| I_INDEX_SI of reg * variant * reg * 'k
(* INDEX <Zd>.<T>, #<imm>, <R><m> *)
Expand Down Expand Up @@ -1951,6 +1955,10 @@ let do_pp_instruction m =
sprintf "DUP %s,%s" (pp_zreg r1) (pp_vreg v r2)
| I_ADD_SV (r1,r2,r3) ->
sprintf "ADD %s,%s,%s" (pp_zreg r1) (pp_zreg r2) (pp_zreg r3)
| I_NEG_SV (r1,r2,r3) ->
sprintf "NEG %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3)
| I_MOVPRFX (r1,r2,r3) ->
sprintf "MOVPRFX %s,%s,%s" (pp_zreg r1) (pp_preg r2) (pp_zreg r3)
| I_INDEX_SI (r1,v,r2,k) ->
sprintf "INDEX %s,%s,%s" (pp_zreg r1) (pp_vreg v r2) (m.pp_k k)
| I_INDEX_IS (r1,v,k,r2) ->
Expand Down Expand Up @@ -2282,7 +2290,7 @@ let fold_regs (f_regs,f_sregs) =
| I_LDXP (_,_,r1,r2,r3)
| I_WHILELT (r1,_,r2,r3) | I_WHILELE (r1,_,r2,r3) | I_WHILELO (r1,_,r2,r3) | I_WHILELS (r1,_,r2,r3)
| I_INDEX_SS(r1,_,r2,r3)
| I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3)
| I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3) | I_NEG_SV (r1,r2,r3) | I_MOVPRFX (r1,r2,r3)
-> fold_reg r1 (fold_reg r2 (fold_reg r3 c))
| I_LDP (_,_,r1,r2,r3,_)
| I_LDPSW (r1,r2,r3,_)
Expand Down Expand Up @@ -2516,6 +2524,10 @@ let map_regs f_reg f_symb =
I_DUP_SV (map_reg r1,v,map_reg r2)
| I_ADD_SV (r1,r2,r3) ->
I_ADD_SV (map_reg r1,map_reg r2,map_reg r3)
| I_NEG_SV (r1,r2,r3) ->
I_NEG_SV (map_reg r1,map_reg r2,map_reg r3)
| I_MOVPRFX (r1,r2,r3) ->
I_MOVPRFX (map_reg r1,map_reg r2,map_reg r3)
| I_INDEX_SI (r1,v,r2,k) ->
I_INDEX_SI (map_reg r1,v,map_reg r2,k)
| I_INDEX_IS (r1,v,k,r2) ->
Expand Down Expand Up @@ -2742,7 +2754,7 @@ let get_next =
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
| I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
| I_MOV_SV _ | I_ADD_SV _
| I_MOV_SV _ | I_ADD_SV _ | I_NEG_SV _ | I_MOVPRFX _
-> [Label.Next;]

(* Check instruction validity, beyond parsing *)
Expand Down Expand Up @@ -2978,7 +2990,7 @@ module PseudoI = struct
| I_MOPL _
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _| I_WHILELS _
| I_UADDV _ | I_DUP_SV _
| I_ADD_SV _ | I_INDEX_SS _
| I_ADD_SV _ | I_INDEX_SS _ | I_NEG_SV _ | I_MOVPRFX _
as keep -> keep
| I_LDR (v,r1,r2,idx) -> I_LDR (v,r1,r2,ext_tr idx)
| I_LDRSW (r1,r2,idx) -> I_LDRSW (r1,r2,ext_tr idx)
Expand Down Expand Up @@ -3148,7 +3160,7 @@ module PseudoI = struct
| I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _
| I_UDF _
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _
| I_ADD_SV _ | I_UADDV _ | I_DUP_SV _
| I_ADD_SV _ | I_UADDV _ | I_DUP_SV _| I_NEG_SV _ | I_MOVPRFX _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
| I_MOV_SV _
-> 0
Expand Down
1 change: 1 addition & 0 deletions lib/AArch64Lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ match name with
| "index" | "INDEX" -> TOK_INDEX
| "mul" | "MUL" -> TOK_MUL
| "vl" | "VL" -> TOK_VL
| "movprfx" | "MOVPRFX" -> MOVPRFX
(* Compare and swap *)
| "cas"|"CAS" -> CAS
| "casa"|"CASA" -> CASA
Expand Down
6 changes: 5 additions & 1 deletion lib/AArch64Parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ let check_op3 op e =
%token <AArch64Base.bh * AArch64Base.atomic_op * AArch64Base.w_type> STOPBH
%token UDF
/* Scalable Vector Extention */
%token WHILELT WHILELE WHILELO WHILELS UADDV TOK_INDEX TOK_MUL TOK_VL
%token WHILELT WHILELE WHILELO WHILELS UADDV TOK_INDEX TOK_MUL TOK_VL MOVPRFX
%token LD1B LD1H LD1W LD1D LD2B LD2H LD2W LD2D LD3B LD3H LD3W LD3D LD4B LD4H LD4W LD4D
%token ST1B ST1H ST1W ST1D ST2B ST2H ST2W ST2D ST3B ST3H ST3W ST3D ST4B ST4H ST4W ST4D
/*
Expand Down Expand Up @@ -1175,6 +1175,10 @@ instr:
| _ -> assert false }
| TOK_INDEX zreg COMMA k COMMA k
{ I_INDEX_II ($2,$4,$6) }
| TOK_NEG zreg COMMA pmreg_m COMMA zreg
{ I_NEG_SV ($2,$4,$6) }
| MOVPRFX zreg COMMA pmreg COMMA zreg
{ I_MOVPRFX ($2,$4,$6) }
/* Compare and swap */
| CAS wreg COMMA wreg COMMA LBRK cxreg zeroopt RBRK
{ I_CAS (V32,RMW_P,$2,$4,$7) }
Expand Down
19 changes: 19 additions & 0 deletions litmus/AArch64Compile_litmus.ml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ module Make(V:Constant.S)(C:Config) =
| I_DUP_SV (r,_,_)
| I_MOV_SV (r,_,_)
| I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_)
(* Accept P/M register so assume partial update of register *)
| I_NEG_SV (r,_,_)
| I_MOVPRFX (r,_,_)
-> A.RegSet.of_list [r]
| _ -> A.RegSet.empty

Expand Down Expand Up @@ -1227,6 +1230,20 @@ module Make(V:Constant.S)(C:Config) =
outputs = [r1];
reg_env = (add_svint32_t [r1;])}

let neg_sv r1 pg r3 =
{ empty_ins with
memo = sprintf "neg %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3);
inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *)
outputs = [r1];
reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])}

let movprfx r1 pg r3 =
{ empty_ins with
memo = sprintf "movprfx %s,%s,%s" (print_zreg "o" 0 0 r1) (print_preg "i" 0 1 pg) (print_zreg "i" 0 2 r3);
inputs = [r1;pg;r3]; (* r1 is intentionally in 'inputs', see comment for reg_class_stable *)
outputs = [r1];
reg_env = (add_svint32_t [r1;r3;])@(add_svbool_t [pg;])}

(* Compare and swap *)

let cas_memo rmw = Misc.lowercase (cas_memo rmw)
Expand Down Expand Up @@ -1662,6 +1679,8 @@ module Make(V:Constant.S)(C:Config) =
| I_INDEX_IS (r1,v,kr,r2) -> index_is r1 v kr r2::k
| I_INDEX_SS (r1,v,r2,r3) -> index_ss r1 v r2 r3::k
| I_INDEX_II (r1,k1,k2) -> index_ii r1 k1 k2::k
| I_NEG_SV (r1,r2,r3) -> neg_sv r1 r2 r3::k
| I_MOVPRFX (r1,r2,r3) -> movprfx r1 r2 r3::k
(* Arithmetic *)
| I_MOV (v,r,K i) -> mov_const v r i::k
| I_MOV (v,r1,RV (_,r2)) -> movr v r1 r2::k
Expand Down

0 comments on commit 3f67773

Please sign in to comment.