Skip to content

Commit

Permalink
Merge pull request #802 from murzinv/sve-basics
Browse files Browse the repository at this point in the history
[all] Introduce basic support for SVE

This PR introduces basic support for SVE instructions in `litmus7`, `diy7` and `herd7`.

Scalable Vector Extension (SVE) is Vector Length Agnostic (VLA):
- Vector Length (VL) is a hardware implementation choice from 128 up to 2048 bits.
- New programming model allows software to scale dynamically to available vector length.
- No need to define a new ISA, rewrite or recompile for new vector lengths.

Scalable vector registers:
- Z0-Z31 extending NEON’s 128-bit V0-V31
- Packed DP, SP & HP floating-point elements
- Packed 64, 32, 16 & 8-bit integer elements
    
 Scalable predicate registers:
   
 - P0-P7 governing predicates for load/store/arithmetic
 - P8-P15 additional predicates for loop management
 - FFR first fault register for software speculation
 
 I personally find tutorial at [1] really great introduction to the SVE programming model.
 
 Implementation choices and known limitations:
     - SVE memory model is not addressed
     - `herd7` implements 128 bits vector length (on top of existing Neon infrastructure)
 
[1] https://www.stonybrook.edu/commcms/ookami/support/_docs/ARM_SVE_tutorial.pdf
  • Loading branch information
maranget authored May 2, 2024
2 parents 0859a42 + a97200c commit 4fbccd1
Show file tree
Hide file tree
Showing 70 changed files with 2,987 additions and 51 deletions.
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@ test.neon::
$(REGRESSION_TEST_MODE)
@ echo "herd7 AArch64 NEON instructions tests: OK"

test:: test.sve
test.sve::
@ echo
$(HERD_REGRESSION_TEST) \
-j $(J) \
-herd-path $(HERD) \
-libdir-path ./herd/libdir \
-litmus-dir ./herd/tests/instructions/AArch64.sve \
-variant sve \
$(REGRESSION_TEST_MODE)
@ echo "herd7 AArch64 SVE instructions tests: OK"

test:: test.mte
test.mte::
@ echo
Expand Down
46 changes: 33 additions & 13 deletions gen/AArch64Arch_gen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ let do_morello = C.variant Variant_gen.Morello
let do_fullkvm = C.variant Variant_gen.FullKVM
let do_kvm = do_fullkvm || C.variant Variant_gen.KVM
let do_neon = C.variant Variant_gen.Neon
let do_sve = C.variant Variant_gen.SVE
let do_mixed = Variant_gen.is_mixed C.variant
let do_cu = C.variant Variant_gen.ConstrainedUnpredictable

Expand All @@ -60,7 +61,7 @@ module Mixed =
let bellatom = false
module SIMD = struct

type atom = NeP|NeAcqPc|NeRel|Ne1|Ne2|Ne3|Ne4|Ne2i|Ne3i|Ne4i|NePa|NePaN
type atom = SvV|Sv1|Sv2i|Sv3i|Sv4i|NeP|NeAcqPc|NeRel|Ne1|Ne2|Ne3|Ne4|Ne2i|Ne3i|Ne4i|NePa|NePaN

let fold_neon f r = r |>
f NeAcqPc |> f NeRel |>
Expand All @@ -69,26 +70,35 @@ module SIMD = struct
f Ne1 |> f Ne2 |> f Ne3 |> f Ne4 |>
f Ne2i |> f Ne3i |> f Ne4i

let fold_sve f r = r |>
f SvV |> f Sv1 |>
f Sv2i |> f Sv3i |> f Sv4i

let nregs = function
| Ne1 -> 1
| Ne2 | Ne2i -> 2
| Ne3 | Ne3i -> 3
| Ne4 | Ne4i -> 4
| SvV | Sv1 | Ne1 -> 1
| Sv2i | Ne2 | Ne2i -> 2
| Sv3i | Ne3 | Ne3i -> 3
| Sv4i | Ne4 | Ne4i -> 4
| _ -> 1

let nelements = function
| SvV|Sv1|Sv2i|Sv3i|Sv4i
| Ne1|Ne2|Ne2i|Ne3|Ne3i|Ne4|Ne4i -> 4
| NePa|NePaN -> 2
| NeP | NeAcqPc | NeRel -> 1

let pp_opt = function
| Sv2i | Sv3i | Sv4i
| Ne2i | Ne3i | Ne4i -> "i"
| _ -> ""

let pp n =
match n with
| Ne1 | Ne2 | Ne3 | Ne4 | Ne2i | Ne3i | Ne4i ->
Printf.sprintf "Ne%i%s" (nregs n) (pp_opt n)
| Sv1 | Sv2i | Sv3i | Sv4i ->
Printf.sprintf "Sv%i%s" (nregs n) (pp_opt n)
| SvV -> "SvV"
| NePa -> "NePa"
| NePaN -> "NePaN"
| NeP -> "NeP"
Expand All @@ -107,9 +117,10 @@ module SIMD = struct
for k = 0 to sz-1 do
for i=0 to el-1 do
let j = match n with
| Ne2i | Ne3i | Ne4i -> k+i*sz
| Sv2i | Sv3i | Sv4i | Ne2i | Ne3i | Ne4i -> k+i*sz
| NeP | NeAcqPc | NeRel | NePa | NePaN
| Ne1 | Ne2 | Ne3 | Ne4 -> i+k*el
| Ne1 | Ne2 | Ne3 | Ne4
| SvV | Sv1 -> i+k*el
in
v.(j) <- start+k
done
Expand All @@ -120,9 +131,10 @@ module SIMD = struct
let el = nelements n in
let sz = nregs n in
let access r k = match n with
| Ne2i | Ne3i | Ne4i -> sz*k + r
| Sv2i | Sv3i | Sv4i | Ne2i | Ne3i | Ne4i -> sz*k + r
| NeP | NeAcqPc | NeRel | NePa | NePaN
| Ne1 | Ne2 | Ne3 | Ne4 -> el*r + k
| Ne1 | Ne2 | Ne3 | Ne4
| SvV | Sv1 -> el*r + k
in
let rec reg r k =
if k >= el then []
Expand Down Expand Up @@ -335,6 +347,12 @@ let is_ifetch a = match a with
else
fun _ r -> r

let fold_sve =
if do_sve then
fun f -> SIMD.fold_sve (fun n -> f (Neon n))
else
fun _ r -> r

let fold_pair f r =
if do_mixed then r
else
Expand All @@ -357,6 +375,7 @@ let is_ifetch a = match a with
let r = fold_morello f r in
let r = fold_tag f r in
let r = fold_neon f r in
let r = fold_sve f r in
let r = fold_pair f r in
let r = fold_acc_opt None f r in
let r =
Expand Down Expand Up @@ -456,10 +475,10 @@ let is_ifetch a = match a with
function
| NeP | NeAcqPc | NeRel -> 1
| NePa | NePaN -> 2
| Ne1 -> 4
| Ne2 | Ne2i -> 8
| Ne3 | Ne3i -> 12
| Ne4 | Ne4i -> 16
| SvV | Sv1 | Ne1 -> 4
| Sv2i | Ne2 | Ne2i -> 8
| Sv3i | Ne3 | Ne3i -> 12
| Sv4i | Ne4 | Ne4i -> 16

let atom_to_bank = function
| Tag,None -> Code.Tag
Expand Down Expand Up @@ -866,6 +885,7 @@ include

type special = reg
let specials = vregs
let specials2 = pregs
end)

end
Loading

0 comments on commit 4fbccd1

Please sign in to comment.