Skip to content

Commit

Permalink
[all]: Introduce basic support for SVE
Browse files Browse the repository at this point in the history
Scalable Vector Extension (SVE) is Vector Length Agnostic (VLA):

 - Vector Length (VL) is a hardware implementation choice from 128 up
   to 2048 bits.
 - New programming model allows software to scale dynamically to
   available vector length.
 - No need to define a new ISA, rewrite or recompile for new vector
   lengths.

Scalable vector registers:

 - Z0-Z31 extending NEON’s 128-bit V0-V31
 - Packed DP, SP & HP floating-point elements
 - Packed 64, 32, 16 & 8-bit integer elements

Scalable predicate registers:

 - P0-P7 governing predicates for load/store/arithmetic
 - P8-P15 additional predicates for loop management
 - FFR first fault register for software speculation

Implementation choices and known limitations:

 - SVE memory model is not addressed

 - `herd7` implements 128 bits vector length (on top of existing Neon
   infrastructure)

 - `litmus7` uses ARM C Language Extensions (ACLE) for SVE
   + Building SVE test require `-ccopts "-march=armv8-a+sve -O2"`
   + Although `Z` registers overlap with `V` registers mixing them in litmus
     test would likely lead to compilation failure (due to difference
     in the ACLE types)
   + However, `V` register overlapped with `Z` register in `final` clause
     is supported (this way we can inspect content of `Z` register)

 - Following SVE instructions are implemented:
   + PTRUE (predicate)
   + MOV (immediate, unpredicated)
   + DUP (scalar)
   + ADD (vectors, unpredicated)
   + INDEX (immediate, scalar)
   + INDEX (immediates)
   + INDEX (scalar, immediate)
   + INDEX (scalars)
   + WHILELE (predicate)
   + WHILELT (predicate)
   + WHILELE (predicate)
   + WHILELO (predicate)
   + WHILELS  (predicate)
   + LD1B (scalar plus immediate, single register)
   + LD1H (scalar plus immediate, single register)
   + LD1W (scalar plus immediate, single register)
   + LD1D (scalar plus immediate, single register)
   + LD1D (scalar plus scalar, single register)
   + LD1B (scalar plus scalar, single register)
   + LD1H (scalar plus scalar, single register)
   + LD1W (scalar plus scalar, single register)
   + LD1B (scalar plus vector)
   + LD1H (scalar plus vector)
   + LD1W (scalar plus vector)
   + LD1D (scalar plus vector)
   + LD2B (scalar plus immediate)
   + LD2H (scalar plus immediate)
   + LD2W (scalar plus immediate)
   + LD2D (scalar plus immediate)
   + LD2B (scalar plus scalar)
   + LD2H (scalar plus scalar)
   + LD2W (scalar plus scalar)
   + LD2D (scalar plus scalar)
   + LD3B (scalar plus immediate)
   + LD3H (scalar plus immediate)
   + LD3W (scalar plus immediate)
   + LD3D (scalar plus immediate)
   + LD3B (scalar plus scalar)
   + LD3H (scalar plus scalar)
   + LD3W (scalar plus scalar)
   + LD3D (scalar plus scalar)
   + LD4B (scalar plus immediate)
   + LD4H (scalar plus immediate)
   + LD4W (scalar plus immediate)
   + LD4D (scalar plus immediate)
   + LD4B (scalar plus scalar)
   + LD4H (scalar plus scalar)
   + LD4W (scalar plus scalar)
   + LD4D (scalar plus scalar)
   + ST1B (scalar plus immediate, single register)
   + ST1H (scalar plus immediate, single register)
   + ST1W (scalar plus immediate, single register)
   + ST1D (scalar plus immediate, single register)
   + ST1B (scalar plus scalar, single register)
   + ST1H (scalar plus scalar, single register)
   + ST1W (scalar plus scalar, single register)
   + ST1D (scalar plus scalar, single register)
   + ST1B (scalar plus vector)
   + ST1H (scalar plus vector)
   + ST1W (scalar plus vector)
   + ST1D (scalar plus vector)
   + ST2B (scalar plus immediate)
   + ST2H (scalar plus immediate)
   + ST2W (scalar plus immediate)
   + ST2D (scalar plus immediate)
   + ST2B (scalar plus scalar)
   + ST2H (scalar plus scalar)
   + ST2W (scalar plus scalar)
   + ST2D (scalar plus scalar)
   + ST3B (scalar plus immediate)
   + ST3H (scalar plus immediate)
   + ST3W (scalar plus immediate)
   + ST3D (scalar plus immediate)
   + ST3B (scalar plus scalar)
   + ST3H (scalar plus scalar)
   + ST3W (scalar plus scalar)
   + ST3D (scalar plus scalar)
   + ST4B (scalar plus immediate)
   + ST4H (scalar plus immediate)
   + ST4W (scalar plus immediate)
   + ST4D (scalar plus immediate)
   + ST4B (scalar plus scalar)
   + ST4H (scalar plus scalar)
   + ST4W (scalar plus scalar)
   + ST4D (scalar plus scalar)
   + SVE aliases for condition codes

Signed-off-by: Vladimir Murzin <[email protected]>
  • Loading branch information
Vladimir Murzin committed May 1, 2024
1 parent 9510ba4 commit b1dbf49
Show file tree
Hide file tree
Showing 55 changed files with 2,532 additions and 35 deletions.
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,18 @@ test.neon::
$(REGRESSION_TEST_MODE)
@ echo "herd7 AArch64 NEON instructions tests: OK"

test:: test.sve
test.sve::
@ echo
$(HERD_REGRESSION_TEST) \
-j $(J) \
-herd-path $(HERD) \
-libdir-path ./herd/libdir \
-litmus-dir ./herd/tests/instructions/AArch64.sve \
-variant sve \
$(REGRESSION_TEST_MODE)
@ echo "herd7 AArch64 SVE instructions tests: OK"

test:: test.mte
test.mte::
@ echo
Expand Down
45 changes: 45 additions & 0 deletions herd/AArch64Arch_herd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_SWP _| I_SWPBH _| I_SXTW _| I_TLBI _| I_UBFM _
| I_UDF _| I_UNSEAL _ | I_ADDSUBEXT _ | I_ABS _ | I_REV _ | I_EXTR _
| I_MOPL _
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _
| I_UADDV _
| I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
| I_MOV_SV _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
| I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _
-> true

let is_cmodx_restricted_value =
Expand Down Expand Up @@ -218,6 +225,17 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
let vs = get_rec 0 in
V.Val (Constant.ConcreteVector vs)

let predicate_mask psize =
let mask = match psize with
| 1 -> "0x1"
| 2 -> "0x3"
| 4 -> "0x7"
| 8 -> "0xff"
| _ -> assert false in
V.stringToV mask

let scalable_mask = neon_mask

let simd_mem_access_size rs = match List.hd rs with
| Vreg (_,(_,8)) -> MachSize.Byte
| Vreg (_,(_,16)) -> MachSize.Short
Expand Down Expand Up @@ -252,6 +270,11 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_LD4 (rs,_,_,_) | I_LD4R (rs,_,_) | I_ST4 (rs,_,_,_)
| I_LD4M (rs,_,_) | I_ST4M (rs,_,_) ->
Some (simd_mem_access_size rs)
| I_LD1SP (v,_,_,_,_) | I_ST1SP (v,_,_,_,_)
| I_LD2SP (v,_,_,_,_) | I_ST2SP (v,_,_,_,_)
| I_LD3SP (v,_,_,_,_) | I_ST3SP (v,_,_,_,_)
| I_LD4SP (v,_,_,_,_) | I_ST4SP (v,_,_,_,_) ->
Some (tr_simd_variant v)
| I_LDRBH (v,_,_,_) | I_LDARBH (v,_,_,_) | I_LDRS ((_,v),_,_,_)
| I_STRBH (v,_,_,_) | I_STLRBH (v,_,_) | I_STXRBH (v,_,_,_,_)
| I_CASBH (v,_,_,_,_) | I_SWPBH (v,_,_,_,_)
Expand All @@ -275,6 +298,10 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_MOVI_S _ | I_MOVI_V _ | I_ADDV _ | I_DUP _ | I_FMOV_TG _
| I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _
| I_UDF _ | I_ADDSUBEXT _ | I_MOPL _
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _
| I_UADDV _
| I_MOV_SV _ | I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
-> None

let all_regs =
Expand Down Expand Up @@ -306,6 +333,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_IC _|I_DC _|I_TLBI _
| I_NOP|I_TBZ _|I_TBNZ _
| I_BL _ | I_BLR _ | I_RET _ | I_ERET | I_UDF _
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
-> [] (* For -variant self only ? *)
| I_LDR (_,r1,r2,MemExt.Imm (_,(PreIdx|PostIdx)))
| I_LDRBH (_,r1,r2,MemExt.Imm (_,(PreIdx|PostIdx)))
Expand Down Expand Up @@ -338,11 +366,21 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_ADDV (_,r,_)
| I_DUP (r,_,_)
| I_FMOV_TG (_,r,_,_)
| I_WHILELT (r,_,_,_) | I_WHILELE (r,_,_,_) | I_WHILELO (r,_,_,_) | I_WHILELS (r,_,_,_)
| I_UADDV (_,r,_,_)
| I_MOV_SV (r,_,_)
| I_DUP_SV (r,_,_) | I_ADD_SV (r,_,_) | I_PTRUE (r,_)
| I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_)
-> [r]
| I_MSR (sr,_)
-> [(SysReg sr)]
| I_LDXP (_,_,r1,r2,_)
-> [r1;r2;]
| I_LD1SP (_,rs,_,_,_)
| I_LD2SP (_,rs,_,_,_)
| I_LD3SP (_,rs,_,_,_)
| I_LD4SP (_,rs,_,_,_)
-> rs
| I_LDAP1 _
| I_STL1 _
| I_LD1 _|I_LD1M _|I_LD1R _|I_LD2 _
Expand Down Expand Up @@ -403,6 +441,13 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) =
| I_CSEL _|I_IC _|I_DC _|I_TLBI _|I_MRS _|I_MSR _
| I_STG _|I_STZG _|I_STZ2G _|I_LDG _|I_UDF _
| I_ADDSUBEXT _|I_MOPL _
| I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _
| I_UADDV _
| I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _
| I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _
| I_ADD_SV _ | I_PTRUE _
| I_MOV_SV _ | I_DUP_SV _
| I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _
-> MachSize.No

include ArchExtra_herd.Make(C)
Expand Down
2 changes: 1 addition & 1 deletion herd/AArch64ParseTest.ml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ module Make(Conf:RunTest.Config)(ModelConfig:MemCat.Config) = struct
let module AArch64Value = CapabilityValue.Make(ConfMorello) in
let module X = AArch64Make(AArch64Value) in
X.X.run
else if Conf.variant Variant.Neon then
else if Conf.variant Variant.Neon || Conf.variant Variant.SVE then
let module AArch64Value = Uint128Value.Make(ConfMorello) in
let module X = AArch64Make(AArch64Value) in
X.X.run
Expand Down
Loading

0 comments on commit b1dbf49

Please sign in to comment.