From b1dbf49dc445ef41d959a32f7f0a4f29cc05724b Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 28 Sep 2023 15:14:41 +0100 Subject: [PATCH] [all]: Introduce basic support for SVE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scalable Vector Extension (SVE) is Vector Length Agnostic (VLA): - Vector Length (VL) is a hardware implementation choice from 128 up to 2048 bits. - New programming model allows software to scale dynamically to available vector length. - No need to define a new ISA, rewrite or recompile for new vector lengths. Scalable vector registers: - Z0-Z31 extending NEON’s 128-bit V0-V31 - Packed DP, SP & HP floating-point elements - Packed 64, 32, 16 & 8-bit integer elements Scalable predicate registers: - P0-P7 governing predicates for load/store/arithmetic - P8-P15 additional predicates for loop management - FFR first fault register for software speculation Implementation choices and known limitations: - SVE memory model is not addressed - `herd7` implements 128 bits vector length (on top of existing Neon infrastructure) - `litmus7` uses ARM C Language Extensions (ACLE) for SVE + Building SVE test require `-ccopts "-march=armv8-a+sve -O2"` + Although `Z` registers overlap with `V` registers mixing them in litmus test would likely lead to compilation failure (due to difference in the ACLE types) + However, `V` register overlapped with `Z` register in `final` clause is supported (this way we can inspect content of `Z` register) - Following SVE instructions are implemented: + PTRUE (predicate) + MOV (immediate, unpredicated) + DUP (scalar) + ADD (vectors, unpredicated) + INDEX (immediate, scalar) + INDEX (immediates) + INDEX (scalar, immediate) + INDEX (scalars) + WHILELE (predicate) + WHILELT (predicate) + WHILELE (predicate) + WHILELO (predicate) + WHILELS (predicate) + LD1B (scalar plus immediate, single register) + LD1H (scalar plus immediate, single register) + LD1W (scalar plus immediate, single register) + LD1D (scalar plus immediate, single register) + LD1D (scalar plus scalar, single register) + LD1B (scalar plus scalar, single register) + LD1H (scalar plus scalar, single register) + LD1W (scalar plus scalar, single register) + LD1B (scalar plus vector) + LD1H (scalar plus vector) + LD1W (scalar plus vector) + LD1D (scalar plus vector) + LD2B (scalar plus immediate) + LD2H (scalar plus immediate) + LD2W (scalar plus immediate) + LD2D (scalar plus immediate) + LD2B (scalar plus scalar) + LD2H (scalar plus scalar) + LD2W (scalar plus scalar) + LD2D (scalar plus scalar) + LD3B (scalar plus immediate) + LD3H (scalar plus immediate) + LD3W (scalar plus immediate) + LD3D (scalar plus immediate) + LD3B (scalar plus scalar) + LD3H (scalar plus scalar) + LD3W (scalar plus scalar) + LD3D (scalar plus scalar) + LD4B (scalar plus immediate) + LD4H (scalar plus immediate) + LD4W (scalar plus immediate) + LD4D (scalar plus immediate) + LD4B (scalar plus scalar) + LD4H (scalar plus scalar) + LD4W (scalar plus scalar) + LD4D (scalar plus scalar) + ST1B (scalar plus immediate, single register) + ST1H (scalar plus immediate, single register) + ST1W (scalar plus immediate, single register) + ST1D (scalar plus immediate, single register) + ST1B (scalar plus scalar, single register) + ST1H (scalar plus scalar, single register) + ST1W (scalar plus scalar, single register) + ST1D (scalar plus scalar, single register) + ST1B (scalar plus vector) + ST1H (scalar plus vector) + ST1W (scalar plus vector) + ST1D (scalar plus vector) + ST2B (scalar plus immediate) + ST2H (scalar plus immediate) + ST2W (scalar plus immediate) + ST2D (scalar plus immediate) + ST2B (scalar plus scalar) + ST2H (scalar plus scalar) + ST2W (scalar plus scalar) + ST2D (scalar plus scalar) + ST3B (scalar plus immediate) + ST3H (scalar plus immediate) + ST3W (scalar plus immediate) + ST3D (scalar plus immediate) + ST3B (scalar plus scalar) + ST3H (scalar plus scalar) + ST3W (scalar plus scalar) + ST3D (scalar plus scalar) + ST4B (scalar plus immediate) + ST4H (scalar plus immediate) + ST4W (scalar plus immediate) + ST4D (scalar plus immediate) + ST4B (scalar plus scalar) + ST4H (scalar plus scalar) + ST4W (scalar plus scalar) + ST4D (scalar plus scalar) + SVE aliases for condition codes Signed-off-by: Vladimir Murzin --- Makefile | 12 + herd/AArch64Arch_herd.ml | 45 ++ herd/AArch64ParseTest.ml | 2 +- herd/AArch64Sem.ml | 530 +++++++++++++++++- herd/libdir/aarch64util.cat | 4 + .../tests/instructions/AArch64.sve/V01.litmus | 13 + .../AArch64.sve/V01.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V02.litmus | 10 + .../AArch64.sve/V02.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V03.litmus | 13 + .../AArch64.sve/V03.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V04.litmus | 13 + .../AArch64.sve/V04.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V05.litmus | 13 + .../AArch64.sve/V05.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V06.litmus | 13 + .../AArch64.sve/V06.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V07.litmus | 12 + .../AArch64.sve/V07.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V08.litmus | 9 + .../AArch64.sve/V08.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V09.litmus | 14 + .../AArch64.sve/V09.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V10.litmus | 15 + .../AArch64.sve/V10.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V11.litmus | 16 + .../AArch64.sve/V11.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V12.litmus | 17 + .../AArch64.sve/V12.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V13.litmus | 10 + .../AArch64.sve/V13.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V14.litmus | 12 + .../AArch64.sve/V14.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V15.litmus | 35 ++ .../AArch64.sve/V15.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V16.litmus | 15 + .../AArch64.sve/V16.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V17.litmus | 18 + .../AArch64.sve/V17.litmus.expected | 11 + .../tests/instructions/AArch64.sve/V18.litmus | 11 + .../AArch64.sve/V18.litmus.expected | 11 + herd/variant.ml | 3 + herd/variant.mli | 1 + jingle/AArch64Arch_jingle.ml | 9 + lib/AArch64Base.ml | 512 ++++++++++++++++- lib/AArch64Lexer.mll | 100 +++- lib/AArch64Parser.mly | 488 +++++++++++++++- lib/CType.ml | 2 + lib/CType.mli | 2 + litmus/AArch64Arch_litmus.ml | 17 +- litmus/AArch64Compile_litmus.ml | 356 ++++++++++++ litmus/ASMLang.ml | 2 +- litmus/libdir/_aarch64/intrinsics.h | 10 + litmus/libdir/armv8+sve-qemu.cfg | 13 + litmus/template.ml | 2 +- 55 files changed, 2532 insertions(+), 35 deletions(-) create mode 100644 herd/tests/instructions/AArch64.sve/V01.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V01.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V02.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V02.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V03.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V03.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V04.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V04.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V05.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V05.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V06.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V06.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V07.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V07.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V08.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V08.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V09.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V09.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V10.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V10.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V11.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V11.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V12.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V12.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V13.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V13.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V14.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V14.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V15.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V15.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V16.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V16.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V17.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V17.litmus.expected create mode 100644 herd/tests/instructions/AArch64.sve/V18.litmus create mode 100644 herd/tests/instructions/AArch64.sve/V18.litmus.expected create mode 100644 litmus/libdir/armv8+sve-qemu.cfg diff --git a/Makefile b/Makefile index f90ff7d997..39ad849130 100644 --- a/Makefile +++ b/Makefile @@ -138,6 +138,18 @@ test.neon:: $(REGRESSION_TEST_MODE) @ echo "herd7 AArch64 NEON instructions tests: OK" +test:: test.sve +test.sve:: + @ echo + $(HERD_REGRESSION_TEST) \ + -j $(J) \ + -herd-path $(HERD) \ + -libdir-path ./herd/libdir \ + -litmus-dir ./herd/tests/instructions/AArch64.sve \ + -variant sve \ + $(REGRESSION_TEST_MODE) + @ echo "herd7 AArch64 SVE instructions tests: OK" + test:: test.mte test.mte:: @ echo diff --git a/herd/AArch64Arch_herd.ml b/herd/AArch64Arch_herd.ml index d49bbf743d..0acfe12822 100644 --- a/herd/AArch64Arch_herd.ml +++ b/herd/AArch64Arch_herd.ml @@ -109,6 +109,13 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_SWP _| I_SWPBH _| I_SXTW _| I_TLBI _| I_UBFM _ | I_UDF _| I_UNSEAL _ | I_ADDSUBEXT _ | I_ABS _ | I_REV _ | I_EXTR _ | I_MOPL _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ + | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ + | I_MOV_SV _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ + | I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _ -> true let is_cmodx_restricted_value = @@ -218,6 +225,17 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = let vs = get_rec 0 in V.Val (Constant.ConcreteVector vs) + let predicate_mask psize = + let mask = match psize with + | 1 -> "0x1" + | 2 -> "0x3" + | 4 -> "0x7" + | 8 -> "0xff" + | _ -> assert false in + V.stringToV mask + + let scalable_mask = neon_mask + let simd_mem_access_size rs = match List.hd rs with | Vreg (_,(_,8)) -> MachSize.Byte | Vreg (_,(_,16)) -> MachSize.Short @@ -252,6 +270,11 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_LD4 (rs,_,_,_) | I_LD4R (rs,_,_) | I_ST4 (rs,_,_,_) | I_LD4M (rs,_,_) | I_ST4M (rs,_,_) -> Some (simd_mem_access_size rs) + | I_LD1SP (v,_,_,_,_) | I_ST1SP (v,_,_,_,_) + | I_LD2SP (v,_,_,_,_) | I_ST2SP (v,_,_,_,_) + | I_LD3SP (v,_,_,_,_) | I_ST3SP (v,_,_,_,_) + | I_LD4SP (v,_,_,_,_) | I_ST4SP (v,_,_,_,_) -> + Some (tr_simd_variant v) | I_LDRBH (v,_,_,_) | I_LDARBH (v,_,_,_) | I_LDRS ((_,v),_,_,_) | I_STRBH (v,_,_,_) | I_STLRBH (v,_,_) | I_STXRBH (v,_,_,_,_) | I_CASBH (v,_,_,_,_) | I_SWPBH (v,_,_,_,_) @@ -275,6 +298,10 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_MOVI_S _ | I_MOVI_V _ | I_ADDV _ | I_DUP _ | I_FMOV_TG _ | I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _ | I_UDF _ | I_ADDSUBEXT _ | I_MOPL _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ + | I_MOV_SV _ | I_DUP_SV _ | I_ADD_SV _ | I_PTRUE _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ -> None let all_regs = @@ -306,6 +333,7 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_IC _|I_DC _|I_TLBI _ | I_NOP|I_TBZ _|I_TBNZ _ | I_BL _ | I_BLR _ | I_RET _ | I_ERET | I_UDF _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ -> [] (* For -variant self only ? *) | I_LDR (_,r1,r2,MemExt.Imm (_,(PreIdx|PostIdx))) | I_LDRBH (_,r1,r2,MemExt.Imm (_,(PreIdx|PostIdx))) @@ -338,11 +366,21 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_ADDV (_,r,_) | I_DUP (r,_,_) | I_FMOV_TG (_,r,_,_) + | I_WHILELT (r,_,_,_) | I_WHILELE (r,_,_,_) | I_WHILELO (r,_,_,_) | I_WHILELS (r,_,_,_) + | I_UADDV (_,r,_,_) + | I_MOV_SV (r,_,_) + | I_DUP_SV (r,_,_) | I_ADD_SV (r,_,_) | I_PTRUE (r,_) + | I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_) -> [r] | I_MSR (sr,_) -> [(SysReg sr)] | I_LDXP (_,_,r1,r2,_) -> [r1;r2;] + | I_LD1SP (_,rs,_,_,_) + | I_LD2SP (_,rs,_,_,_) + | I_LD3SP (_,rs,_,_,_) + | I_LD4SP (_,rs,_,_,_) + -> rs | I_LDAP1 _ | I_STL1 _ | I_LD1 _|I_LD1M _|I_LD1R _|I_LD2 _ @@ -403,6 +441,13 @@ module Make (C:Arch_herd.Config)(V:Value.AArch64) = | I_CSEL _|I_IC _|I_DC _|I_TLBI _|I_MRS _|I_MSR _ | I_STG _|I_STZG _|I_STZ2G _|I_LDG _|I_UDF _ | I_ADDSUBEXT _|I_MOPL _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ + | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ + | I_ADD_SV _ | I_PTRUE _ + | I_MOV_SV _ | I_DUP_SV _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ -> MachSize.No include ArchExtra_herd.Make(C) diff --git a/herd/AArch64ParseTest.ml b/herd/AArch64ParseTest.ml index 2b3bfdb71d..2b54c61e34 100644 --- a/herd/AArch64ParseTest.ml +++ b/herd/AArch64ParseTest.ml @@ -72,7 +72,7 @@ module Make(Conf:RunTest.Config)(ModelConfig:MemCat.Config) = struct let module AArch64Value = CapabilityValue.Make(ConfMorello) in let module X = AArch64Make(AArch64Value) in X.X.run - else if Conf.variant Variant.Neon then + else if Conf.variant Variant.Neon || Conf.variant Variant.SVE then let module AArch64Value = Uint128Value.Make(ConfMorello) in let module X = AArch64Make(AArch64Value) in X.X.run diff --git a/herd/AArch64Sem.ml b/herd/AArch64Sem.ml index b76474c34a..206c025414 100644 --- a/herd/AArch64Sem.ml +++ b/herd/AArch64Sem.ml @@ -34,6 +34,7 @@ module Make let memtag = C.variant Variant.MemTag let morello = C.variant Variant.Morello let neon = C.variant Variant.Neon + let sve = C.variant Variant.SVE let kvm = C.variant Variant.VMSA let is_branching = kvm && not (C.variant Variant.NoPteBranch) let pte2 = kvm && C.variant Variant.PTE2 @@ -54,6 +55,18 @@ module Make "morello instruction %s require -variant morello" (AArch64.dump_instruction inst) + let check_neon inst = + if not (neon || sve) then + Warn.user_error + "Neon instruction %s requires -variant neon" + (AArch64.dump_instruction inst) + + let check_sve inst = + if not sve then + Warn.user_error + "SVE instruction %s requires -variant sve" + (AArch64.dump_instruction inst) + (* Barrier pretty print *) let barriers = let bs = AArch64Base.do_fold_dmb_dsb false true (fun h t -> h::t) [] @@ -154,7 +167,6 @@ module Make (A.Location_reg (ii.A.proc,r)) ii let read_reg_neon is_data r ii = - if not neon then Warn.user_error "Advanced SIMD instructions require -variant neon" ; let vr = match r with | AArch64Base.SIMDreg _ -> r | AArch64Base.Vreg(vr',_) -> (AArch64Base.SIMDreg vr') @@ -226,7 +238,6 @@ module Make neon_replicate old_val (nelem-1) esize v let write_reg_neon_sz sz r v ii = - if not neon then Warn.user_error "Advanced SIMD instructions require -variant neon" ; let vr = match r with | AArch64Base.SIMDreg _ -> r | AArch64Base.Vreg(vr',_) -> (AArch64Base.SIMDreg vr') @@ -250,6 +261,137 @@ module Make neon_replicate v nelem esize v >>= fun new_val -> write_reg_neon_sz sz r new_val ii | _ -> assert false + let predicate_psize r = match r with + | AArch64Base.Preg (_,esize) + (* Infer predicate bitsize from Z register *) + | AArch64Base.Zreg (_,esize) -> esize / 8 + | _ -> assert false + + let predicate_nelem r = match r with + | AArch64Base.Preg (_,esize) + (* Infer predicate elements from Z register *) + | AArch64Base.Zreg (_,esize) -> 128 / esize + | _ -> assert false + + let predicate_count predicate nelem = + let open AArch64 in + match predicate with + | VL1 when nelem >= 1 -> 1 + | VL2 when nelem >= 2 -> 2 + | VL3 when nelem >= 3 -> 3 + | VL4 when nelem >= 4 -> 4 + | VL5 when nelem >= 5 -> 5 + | VL6 when nelem >= 6 -> 6 + | VL7 when nelem >= 7 -> 7 + | VL8 when nelem >= 8 -> 8 + | VL16 when nelem >= 16 -> 16 + | VL32 when nelem >= 32 -> 32 + | VL128 when nelem >= 128 -> 128 + | VL256 when nelem >= 256 -> 256 + | MUL3 -> nelem - (nelem mod 3) + | MUL4 -> nelem - (nelem mod 4) + | ALL -> nelem + | POW2 -> + let rec calc n = + if nelem >= (Int.shift_left 1 n) then calc (n+1) + else Int.shift_left 1 (n-1) in + calc 1 + | _ -> 0 + + let read_reg_predicate is_data r ii = + let vr = match r with + | AArch64Base.Preg(_,_) | AArch64Base.PMreg(_,_) -> r + | _ -> assert false in + let location = A.Location_reg (ii.A.proc,vr) in + M.read_loc is_data (mk_read MachSize.Short Annot.N aexp) location ii + + let predicate_setlane old_val idx psize v = + let mask = V.op1 (Op.LeftShift (idx*psize)) (AArch64.predicate_mask psize) in + let invert = V.op1 Op.Inv mask in + M.op1 (Op.LeftShift (idx*psize)) v >>= fun new_val -> + M.op Op.And invert old_val >>| + M.op Op.And mask new_val >>= fun (v1,v2) -> + M.op Op.Or v1 v2 + + let predicate_getlane cur_val idx psize = + let mask = V.op1 (Op.LeftShift (idx*psize)) (AArch64.predicate_mask psize) in + M.op Op.And mask cur_val >>= fun masked_val -> + M.op1 (Op.LogicalRightShift (idx*psize)) masked_val + + let write_reg_predicate_sz sz r v ii = + let pr = match r with + | AArch64Base.Preg(_,_) -> r + | _ -> assert false in + (* Clear unused register bits (zero extend) *) + M.op1 (Op.Mask sz) v >>= fun v -> + let location = A.Location_reg (ii.A.proc,pr) in + M.write_loc (mk_write MachSize.Short Annot.N aexp Access.REG v) location ii + + let write_reg_predicate = write_reg_predicate_sz MachSize.Short + + let get_predicate_last pred psize idx = + predicate_getlane pred idx psize >>= fun v -> + M.op Op.And v V.one >>= fun last -> + M.op Op.Ne last V.zero + + let get_predicate_any pred psize nelem = + let mask idx = V.op1 (Op.LeftShift (idx*psize)) (AArch64.predicate_mask psize) in + let ops = List.map mask (Misc.interval 0 nelem) in + let allmask = List.fold_right (V.op Op.Or) ops V.zero in + M.op Op.And pred allmask >>= fun all -> + M.op Op.Ne all V.zero + + let scalable_esize r = match r with + | AArch64Base.Zreg (_,esize) -> esize + | _ -> assert false + + let scalable_nelem r = match r with + | AArch64Base.Zreg (_,esize) -> 128 / esize + | _ -> assert false + + let read_reg_scalable is_data r ii = + let vr = match r with + | AArch64Base.Zreg _ -> r + | _ -> assert false in + let location = A.Location_reg (ii.A.proc,vr) in + M.read_loc is_data (mk_read MachSize.S128 Annot.N aexp) location ii + + let scalable_setlane old_val idx esize v = + let mask = V.op1 (Op.LeftShift (idx*esize)) (AArch64.scalable_mask esize) in + let invert = V.op1 Op.Inv mask in + M.op1 (Op.LeftShift (idx*esize)) v >>= fun new_val -> + M.op Op.And invert old_val >>| + M.op Op.And mask new_val >>= fun (v1,v2) -> + M.op Op.Or v1 v2 + + let scalable_getlane cur_val idx esize = + let mask = V.op1 (Op.LeftShift (idx*esize)) (AArch64.scalable_mask esize) in + M.op Op.And mask cur_val >>= fun masked_val -> + M.op1 (Op.LogicalRightShift (idx*esize)) masked_val + + let rec scalable_replicate old_val nelem esize v = match nelem with + | 0 -> M.unitT old_val + | _ -> + scalable_setlane old_val (nelem-1) esize v >>= fun old_val -> + scalable_replicate old_val (nelem-1) esize v + + let write_reg_scalable_sz sz r v ii = + let pr = match r with + | AArch64Base.Zreg(_,_) -> r + | _ -> assert false in + (* Clear unused register bits (zero extend) *) + M.op1 (Op.Mask sz) v >>= fun v -> + let location = A.Location_reg (ii.A.proc,pr) in + M.write_loc (mk_write MachSize.S128 Annot.N aexp Access.REG v) location ii + + let write_reg_scalable = write_reg_scalable_sz MachSize.S128 + + let write_reg_scalable_rep r v ii = + let nelem = scalable_nelem r in + let esize = scalable_esize r in + scalable_replicate v nelem esize v >>= fun new_val -> + write_reg_scalable r new_val ii + let do_write_reg_sz mop sz r v ii = match r with | AArch64.ZR -> M.unitT () | _ -> match sz with @@ -1381,6 +1523,7 @@ module Make read_mem_postindexed a_virt op sz Annot.N aexp ac rd rs k a ii) ma ii) + | _ -> assert false let ldr sz = ldr0 (uxt_op sz) sz and ldrsw rd rs e ii = @@ -1543,6 +1686,7 @@ module Make str_simple sz rs rd (get_ea_preindexed rd k ii) ii | Reg (v,ri,sext,s) -> str_simple sz rs rd (get_ea_reg rd v ri sext s ii) ii + | _ -> assert false let stp_wback = @@ -2033,6 +2177,123 @@ module Make reduce (nelem-1) (M.add (V.intToV 0) (V.intToV 0)) >>= fun v -> write_reg_neon_sz sz r1 v ii + let uaddv var r1 p src ii = + let nelem = predicate_nelem src in + let psize = predicate_psize src in + let esize = scalable_esize src in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (read_reg_scalable false src ii >>= fun src -> + let read_active_elem_or_zero idx cur_val = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane cur_val idx esize) + mzero + in + let rec reduce cur_val n op = + match n with + | 0 -> op >>| read_active_elem_or_zero n cur_val >>= fun (v1,v2) -> M.add v1 v2 + | _ -> reduce cur_val (n-1) (op >>| read_active_elem_or_zero n cur_val >>= fun (v1,v2) -> M.add v1 v2) + in + reduce src (nelem-1) mzero) + mzero >>= fun v -> + let sz = AArch64Base.tr_simd_variant var in + write_reg_neon_sz sz r1 v ii + + let while_op compare unsigned p var r1 r2 ii = + let psize = predicate_psize p in + let nelem = predicate_nelem p in + let sz = AArch64Base.tr_variant var in + let extend = if unsigned then uxt_op sz else sxt_op sz in + read_reg_ord_sz sz r1 ii >>| + read_reg_ord_sz sz r2 ii >>= fun (v1,v2) -> + let rec repeat old_val idx v1 v2 last = + if idx < nelem then + extend v1 >>| + extend v2 >>= fun (v1,v2) -> + compare v1 v2 >>= fun (cond) -> + M.op Op.And last cond >>= fun last -> + predicate_setlane old_val idx psize last >>| + M.add v1 V.one >>= fun (old_val, v1) -> + repeat old_val (idx+1) v1 v2 last + else M.unitT old_val + in + repeat V.zero 0 v1 v2 V.one >>= fun (new_val) -> + write_reg_predicate p new_val ii >>| + ( let last idx = get_predicate_last new_val psize idx in + (* Fisrt active *) + let n = last 0 >>= fun v -> + M.op1 (Op.LeftShift 3) v in + (* Non active *) + let z = + let rec reduce idx op = match idx with + | 0 -> op >>| last idx >>= fun (v1,v2) -> M.op Op.Or v1 v2 + | _ -> reduce (idx-1) (op >>| last idx >>= fun (v1,v2) -> M.op Op.Or v1 v2) + in + reduce (nelem-1) mzero >>= fun v -> + M.op1 Op.Not v >>= fun v -> + M.op1 (Op.LeftShift 2) v in + (* Not last active*) + let c = last (nelem-1) >>= fun v -> + M.op1 Op.Not v >>= fun v -> + M.op1 (Op.LeftShift 1) v in + (* v always 0 *) + let flags = n >>| z >>= fun (v1,v2) -> + M.op Op.Or v1 v2 >>| c >>= fun (v1,v2) -> + M.op Op.Or v1 v2 in + flags >>= fun flags -> write_reg AArch64Base.NZCV flags ii + ) + + let ptrue p pattern ii = + let psize = predicate_psize p in + let nelem = predicate_nelem p in + let count = predicate_count pattern nelem in + let rec repeat old_val idx = + if idx < nelem then + let v = if idx < count then V.one else V.zero in + predicate_setlane old_val idx psize v >>= fun old_val -> + repeat old_val (idx+1) + else M.unitT old_val + in + repeat V.zero 0 >>= fun (new_val) -> + write_reg_predicate p new_val ii + + let mov_sv r k shift ii = + let open AArch64Base in + begin match shift with + | S_NOEXT + | S_LSL(0) -> + M.unitT (V.intToV k) + | S_LSL(8 as amount) -> + M.op1 (Op.LeftShift amount) (V.intToV k) + | S_LSL(n) -> + Warn.fatal + "illegal shift immediate %d in instruction mov" + n + | s -> + Warn.fatal + "illegal shift operand %s in in instruction mov" + (pp_barrel_shift "," s pp_imm) + end + >>= fun v -> write_reg_scalable_rep r v ii + + let index r v1 v2 ii = + let nelem = scalable_nelem r in + let esize = scalable_esize r in + let increment cur_val idx o = + M.add v1 o >>= fun v -> scalable_setlane cur_val idx esize v in + let rec reduce n op = + let i = V.op Op.Mul (V.intToV n) v2 in + match n with + | 0 -> op >>= fun old_val -> increment old_val n i + | _ -> reduce (n-1) (op >>= fun old_val -> increment old_val n i) + in + reduce (nelem-1) mzero >>= fun v -> + write_reg_scalable r v ii + (******************************) (* Move constant instructions *) (******************************) @@ -2223,6 +2484,125 @@ module Make let ops = List.mapi op rlist in List.fold_right (>>::) ops (M.unitT [[()]]) + let load_predicated_elem_or_zero_m sz p ma rlist ii = + let r = List.hd rlist in + let nelem = scalable_nelem r in + let psize = predicate_psize r in + let esize = scalable_esize r in + let nregs = List.length rlist in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (ma >>= fun addr -> + let op i r = + let calc_offset idx = (V.intToV ((idx*nregs+i) * MachSize.nbytes sz)) in + let load idx = + let offset = calc_offset idx in + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (M.add addr offset >>= fun addr -> + do_read_mem_ret sz Annot.N aexp Access.VIR addr ii) + mzero >>= fun v -> + M.op1 (Op.LeftShift (idx*esize)) v in + let rec reduce idx op = + match idx with + | 0 -> op >>| load idx >>= fun (v1,v2) -> M.op Op.Or v1 v2 + | _ -> reduce (idx-1) (op >>| load idx >>= fun (v1,v2) -> M.op Op.Or v1 v2) + in + reduce (nelem-1) mzero >>= fun v -> + write_reg_scalable r v ii + in + let ops = List.mapi op rlist in + List.fold_right (>>::) ops (M.unitT [()])) + (let ops = List.map (fun r -> write_reg_scalable r V.zero ii) rlist in + List.fold_right (>>::) ops (M.unitT [()])) + + let store_predicated_elem_or_merge_m sz p ma rlist ii = + let r = List.hd rlist in + let nelem = scalable_nelem r in + let psize = predicate_psize r in + let esize = scalable_esize r in + let nregs = List.length rlist in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (ma >>= fun addr -> + let op i r = + read_reg_scalable true r ii >>= fun v -> + let calc_offset idx = (V.intToV ((idx*nregs+i) * MachSize.nbytes sz)) in + let store idx = + let offset = calc_offset idx in + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (M.add addr offset >>| scalable_getlane v idx esize >>= fun (addr,v) -> + write_mem sz aexp Access.VIR addr v ii) + (M.unitT ()) + in + let rec reduce idx op = + match idx with + | 0 -> store idx >>:: op + | _ -> reduce (idx-1) (store idx >>:: op) + in + reduce (nelem-1) (M.unitT [()]) in + let ops = List.mapi op rlist in + List.fold_right (>>::) ops (M.unitT [[()]])) + (M.unitT [[()]]) + + let load_gather_predicated_elem_or_zero sz p ma mo rs e k ii = + let r = List.hd rs in + let psize = predicate_psize r in + let nelem = scalable_nelem r in + let esize = scalable_esize r in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (ma >>| mo >>= fun (base,offsets) -> + let load idx = + get_predicate_last pred psize idx >>= fun last -> + M.choiceT + last + (scalable_getlane offsets idx esize >>= memext_sext e k >>= fun o -> + M.add o base >>= fun addr -> + do_read_mem_ret sz Annot.N aexp Access.VIR addr ii) + mzero + >>= fun v -> M.op1 (Op.LeftShift (idx*esize)) v + in + let rec reduce idx op = + match idx with + | 0 -> op >>| load idx >>= fun (v1,v2) -> M.op Op.Or v1 v2 + | _ -> reduce (idx-1) (op >>| load idx >>= fun (v1,v2) -> M.op Op.Or v1 v2) + in + reduce (nelem-1) mzero) + mzero >>= fun v -> + write_reg_scalable r v ii + + let store_scatter_predicated_elem_or_merge sz p ma mo rs e k ii = + let r = List.hd rs in + let psize = predicate_psize r in + let nelem = scalable_nelem r in + let esize = scalable_esize r in + read_reg_predicate false p ii >>= fun pred -> + get_predicate_any pred psize nelem >>= fun any -> + M.choiceT + any + (ma >>| mo >>| read_reg_scalable true r ii >>= fun ((base,offsets),v) -> + let op idx = + let store = + (scalable_getlane offsets idx esize >>= memext_sext e k >>= fun o -> + M.add o base) >>| + scalable_getlane v idx esize >>= fun (addr,v) -> + write_mem sz aexp Access.VIR addr v ii in + get_predicate_last pred psize idx >>= fun last -> + M.choiceT last store (M.unitT ()) in + let ops = List.map op (Misc.interval 0 nelem) in + List.fold_right (>>::) ops (M.unitT [()])) + (M.unitT [()]) + (* Data cache operations *) let dc_loc op a ii = let mk_act loc = Act.CMO (AArch64.CMO.DC op,Some loc) in @@ -2566,46 +2946,60 @@ module Make (* Neon operations *) | I_ADDV(var,r1,r2) -> + check_neon inst; !(addv var r1 r2 ii) | I_DUP(r1,var,r2) -> + check_neon inst; !(let sz = tr_variant var in read_reg_ord_sz sz r2 ii >>= fun v -> write_reg_neon_rep (neon_sz r1) r1 v ii) | I_FMOV_TG(_,r1,_,r2) -> + check_neon inst; !(read_reg_neon false r2 ii >>= fun v -> write_reg r1 v ii) | I_MOV_VE(r1,i1,r2,i2) -> + check_neon inst; !(read_reg_neon_elem false r2 i2 ii >>= fun v -> write_reg_neon_elem MachSize.S128 r1 i1 v ii) | I_MOV_FG(r1,i,var,r2) -> + check_neon inst; !(let sz = tr_variant var in read_reg_ord_sz sz r2 ii >>= fun v -> write_reg_neon_elem MachSize.S128 r1 i v ii) | I_MOV_TG(_,r1,r2,i) -> + check_neon inst; !(read_reg_neon_elem false r2 i ii >>= fun v -> write_reg r1 v ii) | I_MOV_V(r1,r2) -> + check_neon inst; !(read_reg_neon false r2 ii >>= fun v -> write_reg_neon r1 v ii) | I_MOV_S(var,r1,r2,i) -> + check_neon inst; !(let sz = tr_simd_variant var in read_reg_neon_elem false r2 i ii >>= fun v -> write_reg_neon_sz sz r1 v ii) | I_MOVI_V(r,k,shift) -> + check_neon inst; !(movi_v r k shift ii) | I_MOVI_S(var,r,k) -> + check_neon inst; !(movi_s var r k ii) | I_EOR_SIMD(r1,r2,r3) -> + check_neon inst; let size = neon_sz r1 in !(simd_op EOR size r1 r2 r3 ii) | I_ADD_SIMD(r1,r2,r3) -> + check_neon inst; let size = neon_sz r1 in !(simd_op ADD size r1 r2 r3 ii) | I_ADD_SIMD_S(r1,r2,r3) -> + check_neon inst; !(simd_op ADD MachSize.Quad r1 r2 r3 ii) (* Neon loads and stores *) | I_LDAP1(rs,i,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (load_elem_ldar MachSize.S128 i) addr rs ii >>| post_kr rA addr kr ii)) @@ -2613,6 +3007,7 @@ module Make | I_LD2(rs,i,rA,kr) | I_LD3(rs,i,rA,kr) | I_LD4(rs,i,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (load_elem MachSize.S128 i) addr rs ii >>| post_kr rA addr kr ii)) @@ -2620,20 +3015,24 @@ module Make | I_LD2R(rs,rA,kr) | I_LD3R(rs,rA,kr) | I_LD4R(rs,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (load_elem_rep MachSize.S128) addr rs ii >>| post_kr rA addr kr ii)) | I_LD1M(rs,rA,kr) -> + check_neon inst; !!(read_reg_ord rA ii >>= fun addr -> (load_m_contigous addr rs ii >>| post_kr rA addr kr ii)) | I_LD2M(rs,rA,kr) | I_LD3M(rs,rA,kr) | I_LD4M(rs,rA,kr) -> + check_neon inst; !!(read_reg_ord rA ii >>= fun addr -> (load_m addr rs ii >>| post_kr rA addr kr ii)) | I_STL1(rs,i,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (store_elem_stlr i) addr rs ii >>| post_kr rA addr kr ii)) @@ -2641,69 +3040,85 @@ module Make | I_ST2(rs,i,rA,kr) | I_ST3(rs,i,rA,kr) | I_ST4(rs,i,rA,kr) -> + check_neon inst; !!!(read_reg_ord rA ii >>= fun addr -> (mem_ss (store_elem i) addr rs ii >>| post_kr rA addr kr ii)) | I_ST1M(rs,rA,kr) -> + check_neon inst; !!!!(read_reg_ord rA ii >>= fun addr -> (store_m_contigous addr rs ii >>| post_kr rA addr kr ii)) | I_ST2M(rs,rA,kr) | I_ST3M(rs,rA,kr) | I_ST4M(rs,rA,kr) -> + check_neon inst; !!!!(read_reg_ord rA ii >>= fun addr -> (store_m addr rs ii >>| post_kr rA addr kr ii)) | I_LDR_SIMD(var,r1,rA,MemExt.Reg(v,kr,sext,s)) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea_reg rA v kr sext s ii >>= fun addr -> simd_ldr access_size addr r1 ii >>= B.next1T | I_LDR_SIMD(var,r1,rA,MemExt.Imm (k,Idx)) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea_idx rA k ii >>= fun addr -> simd_ldr access_size addr r1 ii >>= B.next1T | I_LDR_SIMD(var,r1,rA,MemExt.Imm (k,PreIdx)) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea_preindexed rA k ii >>= fun addr -> simd_ldr access_size addr r1 ii >>= B.next1T | I_LDR_SIMD(var,r1,rA,MemExt.Imm (k,PostIdx)) -> + check_neon inst; let access_size = tr_simd_variant var in read_reg_ord rA ii >>= fun addr -> simd_ldr access_size addr r1 ii >>| post_kr rA addr (K k) ii >>= B.next2T | I_LDUR_SIMD(var,r1,rA,k) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea rA (K k) S_NOEXT ii >>= fun addr -> simd_ldr access_size addr r1 ii >>= B.next1T | I_LDAPUR_SIMD(var,r1,rA,k) -> + check_neon inst; let access_size = tr_simd_variant var in get_ea rA (K k) S_NOEXT ii >>= fun addr -> simd_ldar access_size addr r1 ii >>= B.next1T | I_STR_SIMD(var,r1,rA,MemExt.Reg (v,kr,sext,s)) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_reg rA v kr sext s ii in simd_str access_size ma r1 ii | I_STR_SIMD(var,r1,rA,MemExt.Imm (k,Idx)) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_idx rA k ii in simd_str access_size ma r1 ii | I_STR_SIMD(var,r1,rA,MemExt.Imm (k,PreIdx)) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_preindexed rA k ii in simd_str access_size ma r1 ii | I_STR_SIMD(var,r1,rA,MemExt.Imm (k,PostIdx)) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = read_reg_ord rA ii in simd_str_p access_size ma r1 rA (K k) ii | I_STUR_SIMD(var,r1,rA,k) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_idx rA k ii in simd_str access_size ma r1 ii | I_STLUR_SIMD(var,r1,rA,k) -> + check_neon inst; let access_size = tr_simd_variant var in let ma = get_ea_idx rA k ii in simd_stlr access_size ma r1 ii | I_LDP_SIMD(tnt,var,r1,r2,r3,idx) -> + check_neon inst; begin match idx with | k,Idx -> @@ -2719,6 +3134,7 @@ module Make fun (b,()) -> M.unitT b end | I_STP_SIMD(tnt,var,r1,r2,r3,idx) -> + check_neon inst; begin match idx with | k,Idx -> @@ -2733,6 +3149,111 @@ module Make post_kr r3 addr (K k) ii) >>= fun (b,()) -> M.unitT b end + (* Scalable vector instructions *) + | I_LD1SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_LD2SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_LD3SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_LD4SP(var,rs,p,rA,MemExt.Imm (k,Idx)) -> + check_sve inst; + !!!(let sz = tr_simd_variant var in + let ma = get_ea_idx rA k ii in + load_predicated_elem_or_zero_m sz p ma rs ii >>| + M.unitT ()) + | I_LD1SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_LD2SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_LD3SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_LD4SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) -> + check_sve inst; + !!!(let sz = tr_simd_variant var in + let ma = get_ea_reg rA V64 rM MemExt.LSL s ii in + load_predicated_elem_or_zero_m sz p ma rs ii >>| + M.unitT ()) + | I_LD1SP (var,rs,p,rA,MemExt.ZReg (rM,sext,s)) -> + check_sve inst; + !(let sz = tr_simd_variant var in + let ma = read_reg_ord rA ii in + let mo = read_reg_scalable false rM ii in + load_gather_predicated_elem_or_zero sz p ma mo rs sext s ii) + | I_ST1SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_ST2SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_ST3SP(var,rs,p,rA,MemExt.Imm (k,Idx)) + | I_ST4SP(var,rs,p,rA,MemExt.Imm (k,Idx)) -> + check_sve inst; + !!!!(let sz = tr_simd_variant var in + let ma = get_ea_idx rA k ii in + store_predicated_elem_or_merge_m sz p ma rs ii >>| + M.unitT ()) + | I_ST1SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_ST2SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_ST3SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) + | I_ST4SP(var,rs,p,rA,MemExt.Reg (V64,rM,MemExt.LSL,s)) -> + check_sve inst; + !!!!(let sz = tr_simd_variant var in + let ma = get_ea_reg rA V64 rM MemExt.LSL s ii in + store_predicated_elem_or_merge_m sz p ma rs ii >>| + M.unitT ()) + | I_ST1SP (var,rs,p,rA,MemExt.ZReg (rM,sext,s)) -> + check_sve inst; + !!!(let sz = tr_simd_variant var in + let ma = read_reg_ord rA ii in + let mo = read_reg_scalable false rM ii in + store_scatter_predicated_elem_or_merge sz p ma mo rs sext s ii >>| + M.unitT ()) + | I_PTRUE(p,pattern) -> + check_sve inst; + !(ptrue p pattern ii) + | I_WHILELT(p,var,r1,r2) -> + check_sve inst; + !!(while_op (M.op Op.Lt) false p var r1 r2 ii) + | I_WHILELO(p,var,r1,r2) -> + check_sve inst; + !!(while_op (M.op Op.Lt) true p var r1 r2 ii) + | I_WHILELE(p,var,r1,r2) -> + check_sve inst; + !!(while_op (M.op Op.Le) false p var r1 r2 ii) + | I_WHILELS(p,var,r1,r2) -> + check_sve inst; + !!(while_op (M.op Op.Le) true p var r1 r2 ii) + | I_ADD_SV (r1,r2,r3) -> + check_sve inst; + !(read_reg_scalable false r3 ii >>| + read_reg_scalable false r2 ii >>= fun (v1,v2) -> + M.add v1 v2 >>= fun v -> + write_reg_scalable r1 v ii) + | I_UADDV(var,v,p,z) -> + check_sve inst; + !(uaddv var v p z ii) + | I_MOV_SV(r,k,shift) -> + check_sve inst; + !(mov_sv r k shift ii) + | I_DUP_SV(r1,var,r2) -> + check_sve inst; + !(let sz = tr_variant var in + read_reg_ord_sz sz r2 ii >>= fun v -> + write_reg_scalable_rep r1 v ii) + | I_INDEX_SI (r1,var,r2,k) -> + check_sve inst; + !(let sz = tr_variant var in + let v2 = V.intToV k in + read_reg_ord_sz sz r2 ii >>= fun v1 -> + index r1 v1 v2 ii) + | I_INDEX_IS (r1,var,k,r2) -> + check_sve inst; + !(let sz = tr_variant var in + let v1 = V.intToV k in + read_reg_ord_sz sz r2 ii >>= fun v2 -> + index r1 v1 v2 ii) + | I_INDEX_SS (r1,var,r2,r3) -> + check_sve inst; + !(let sz = tr_variant var in + read_reg_ord_sz sz r2 ii >>| + read_reg_ord_sz sz r3 ii >>= fun (v1,v2) -> + index r1 v1 v2 ii) + | I_INDEX_II (r1,k1,k2) -> + check_sve inst; + !(let v1 = V.intToV k1 in + let v2 = V.intToV k2 in + index r1 v1 v2 ii) (* Morello instructions *) | I_ALIGND(rd,rn,k) -> check_morello inst ; @@ -3136,7 +3657,10 @@ module Make m_fault >>| set_elr_el1 lbl_v ii >>! B.Fault [AArch64Base.elr_el1, lbl_v] (* Cannot handle *) (* | I_BL _|I_BLR _|I_BR _|I_RET _ *) - | (I_STG _|I_STZG _|I_STZ2G _) as i -> + | (I_STG _|I_STZG _|I_STZ2G _ + | I_LDR_SIMD _| I_STR_SIMD _ + | I_LD1SP _| I_LD2SP _| I_LD3SP _| I_LD4SP _ + | I_ST1SP _|I_ST2SP _|I_ST3SP _|I_ST4SP _) as i -> Warn.fatal "illegal instruction: %s" (AArch64.dump_instruction i) (* Compute a safe set of instructions that can diff --git a/herd/libdir/aarch64util.cat b/herd/libdir/aarch64util.cat index b117f3f9f2..50df3229c0 100644 --- a/herd/libdir/aarch64util.cat +++ b/herd/libdir/aarch64util.cat @@ -132,4 +132,8 @@ flag ~empty (if "vmsa" && "ifetch" then _ else 0) flag ~empty (if "memtag" && "ifetch" then _ else 0) as combining-memtag-and-ifetch-is-not-supported +(* Flag the runs of herd7 with work-in-progress features *) +flag ~empty (if "sve" then _ else 0) + as Scalable-Vector-Extensions-is-work-in-progress + include "aarch64loc.cat" diff --git a/herd/tests/instructions/AArch64.sve/V01.litmus b/herd/tests/instructions/AArch64.sve/V01.litmus new file mode 100644 index 0000000000..c28c2ef202 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V01.litmus @@ -0,0 +1,13 @@ +AArch64 V01 +(* Add vectors (unpredicated) *) +{ +} +P0 ; + MOV Z0.S, #1 ; + MOV Z1.S, #2 ; + ADD Z2.S,Z1.S,Z0.S ; + MOV Z3.D, #3 ; + MOV Z4.D, #4 ; + ADD Z5.D,Z4.D,Z3.D ; + +forall 0:V2.4S={3,3,3,3} /\ 0:V5.4S={7,0,7,0} diff --git a/herd/tests/instructions/AArch64.sve/V01.litmus.expected b/herd/tests/instructions/AArch64.sve/V01.litmus.expected new file mode 100644 index 0000000000..20f7a91980 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V01.litmus.expected @@ -0,0 +1,11 @@ +Test V01 Required +States 1 +0:V2.4S={3,3,3,3}; 0:V5.4S={7,0,7,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V2.4S={3,3,3,3} /\ 0:V5.4S={7,0,7,0}) +Observation V01 Always 1 0 +Hash=dd05dffbd617d9f8658242b89d30bec8 + diff --git a/herd/tests/instructions/AArch64.sve/V02.litmus b/herd/tests/instructions/AArch64.sve/V02.litmus new file mode 100644 index 0000000000..d5ff66ef60 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V02.litmus @@ -0,0 +1,10 @@ +AArch64 V02 +(* Broadcast general-purpose register to vector elements (unpredicated) *) +{ +} +P0 ; + MOV W0,#4 ; + DUP Z0.H, W0 ; + DUP Z1.S, W0 ; + +forall 0:V0.4S={0x40004,0x40004,0x40004,0x40004} /\ 0:V1.4S={4,4,4,4} diff --git a/herd/tests/instructions/AArch64.sve/V02.litmus.expected b/herd/tests/instructions/AArch64.sve/V02.litmus.expected new file mode 100644 index 0000000000..294fff1644 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V02.litmus.expected @@ -0,0 +1,11 @@ +Test V02 Required +States 1 +0:V0.4S={262148,262148,262148,262148}; 0:V1.4S={4,4,4,4}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={262148,262148,262148,262148} /\ 0:V1.4S={4,4,4,4}) +Observation V02 Always 1 0 +Hash=de76ac983b66dc71631de247107dea60 + diff --git a/herd/tests/instructions/AArch64.sve/V03.litmus b/herd/tests/instructions/AArch64.sve/V03.litmus new file mode 100644 index 0000000000..966ae192a1 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V03.litmus @@ -0,0 +1,13 @@ +AArch64 V03 +(* Initialise predicate from named constraint *) +{ + int x[4]; + + 0:X0=x; +} +P0 ; + PTRUE P0.S,VL3 ; + MOV Z0.S,#1 ; + ST1W {Z0.S},P0,[X0]; + +forall(x = {1,1,1,0}) diff --git a/herd/tests/instructions/AArch64.sve/V03.litmus.expected b/herd/tests/instructions/AArch64.sve/V03.litmus.expected new file mode 100644 index 0000000000..7cf15e89b7 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V03.litmus.expected @@ -0,0 +1,11 @@ +Test V03 Required +States 1 +x={1,1,1,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,1,1,0}) +Observation V03 Always 1 0 +Hash=4d6c70541c7e2576a5148e43c261e804 + diff --git a/herd/tests/instructions/AArch64.sve/V04.litmus b/herd/tests/instructions/AArch64.sve/V04.litmus new file mode 100644 index 0000000000..6deed91ebf --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V04.litmus @@ -0,0 +1,13 @@ +AArch64 V04 +(* Contiguous load unsigned bytes to vector (scalar index), 32-bit element *) +{ + uint8_t x[4] = {1,2,3,4}; + + 0:X0=x; +} +P0 ; + MOV X1,#0 ; + PTRUE P0.S,VL4 ; + LD1B {Z1.S},P0/Z,[X0,X1] ; + +forall(0:V1.4S = {1,2,3,4}) diff --git a/herd/tests/instructions/AArch64.sve/V04.litmus.expected b/herd/tests/instructions/AArch64.sve/V04.litmus.expected new file mode 100644 index 0000000000..58a584dc99 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V04.litmus.expected @@ -0,0 +1,11 @@ +Test V04 Required +States 1 +0:V1.4S={1,2,3,4}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={1,2,3,4}) +Observation V04 Always 1 0 +Hash=6c05c2f57e2f2a230130f55ecf9803c1 + diff --git a/herd/tests/instructions/AArch64.sve/V05.litmus b/herd/tests/instructions/AArch64.sve/V05.litmus new file mode 100644 index 0000000000..8dab731b98 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V05.litmus @@ -0,0 +1,13 @@ +AArch64 V05 +(* Contiguous load two-word structures to two vectors (scalar index) *) +{ + uint16_t x[8] = {1,2,0,0,0,0,0,0}; + + 0:X0=x; +} +P0 ; + MOV X2,#0 ; + PTRUE P0.H,VL1 ; + LD2H {Z0.H,Z1.H},P0/Z,[X0,X2,LSL #1] ; + +forall(0:V0.4S = {1,0,0,0} /\ 0:V1.4S = {2,0,0,0}) diff --git a/herd/tests/instructions/AArch64.sve/V05.litmus.expected b/herd/tests/instructions/AArch64.sve/V05.litmus.expected new file mode 100644 index 0000000000..77c4088ab9 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V05.litmus.expected @@ -0,0 +1,11 @@ +Test V05 Required +States 1 +0:V0.4S={1,0,0,0}; 0:V1.4S={2,0,0,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={1,0,0,0} /\ 0:V1.4S={2,0,0,0}) +Observation V05 Always 1 0 +Hash=82b3428acf9418cdbdbcd1cac3a5204b + diff --git a/herd/tests/instructions/AArch64.sve/V06.litmus b/herd/tests/instructions/AArch64.sve/V06.litmus new file mode 100644 index 0000000000..9d5a1cba2a --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V06.litmus @@ -0,0 +1,13 @@ +AArch64 V06 +(* Contiguous load three-word structures to three vectors (scalar index) *) +{ + int x[12] = {1,2,3,1,2,3,1,2,3,1,2,3}; + + 0:X0=x; +} +P0 ; + MOV X2,#0 ; + PTRUE P0.S,VL4 ; + LD3W {Z0.S,Z1.S,Z2.S},P0/Z,[X0,X2,LSL #2] ; + +forall(0:V0.4S = {1,1,1,1} /\ 0:V1.4S = {2,2,2,2} /\ 0:V2.4S = {3,3,3,3}) diff --git a/herd/tests/instructions/AArch64.sve/V06.litmus.expected b/herd/tests/instructions/AArch64.sve/V06.litmus.expected new file mode 100644 index 0000000000..2e43efd960 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V06.litmus.expected @@ -0,0 +1,11 @@ +Test V06 Required +States 1 +0:V0.4S={1,1,1,1}; 0:V1.4S={2,2,2,2}; 0:V2.4S={3,3,3,3}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={1,1,1,1} /\ 0:V1.4S={2,2,2,2} /\ 0:V2.4S={3,3,3,3}) +Observation V06 Always 1 0 +Hash=0b0d64e1c8f343cdce7976205bc08f2b + diff --git a/herd/tests/instructions/AArch64.sve/V07.litmus b/herd/tests/instructions/AArch64.sve/V07.litmus new file mode 100644 index 0000000000..42c7395a3c --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V07.litmus @@ -0,0 +1,12 @@ +AArch64 V07 +(* Contiguous load four-word structures to four vectors (scalar index) *) +{ + uint64_t x[8] = {1,2,3,4,1,2,3,4}; + + 0:X0=x; +} +P0 ; + MOV X2,#0 ; + PTRUE P0.D,VL2 ; + LD4D {Z0.D,Z1.D,Z2.D,Z3.D},P0/Z,[X0,X2,LSL #3]; +forall(0:V0.4S = {1,0,1,0} /\ 0:V1.4S = {2,0,2,0} /\ 0:V2.4S = {3,0,3,0} /\ 0:V3.4S = {4,0,4,0}) diff --git a/herd/tests/instructions/AArch64.sve/V07.litmus.expected b/herd/tests/instructions/AArch64.sve/V07.litmus.expected new file mode 100644 index 0000000000..b2312ff2b9 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V07.litmus.expected @@ -0,0 +1,11 @@ +Test V07 Required +States 1 +0:V0.4S={1,0,1,0}; 0:V1.4S={2,0,2,0}; 0:V2.4S={3,0,3,0}; 0:V3.4S={4,0,4,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={1,0,1,0} /\ 0:V1.4S={2,0,2,0} /\ 0:V2.4S={3,0,3,0} /\ 0:V3.4S={4,0,4,0}) +Observation V07 Always 1 0 +Hash=b6883bda119e7c00f9493ab4b2a289a8 + diff --git a/herd/tests/instructions/AArch64.sve/V08.litmus b/herd/tests/instructions/AArch64.sve/V08.litmus new file mode 100644 index 0000000000..856ff5b817 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V08.litmus @@ -0,0 +1,9 @@ +AArch64 V08 +(* Broadcast signed immediate to vector elements (unpredicated) *) +{ +} +P0 ; + MOV Z0.S,#4 ; + MOV Z1.B,#1 ; + +forall 0:V0.4S={4,4,4,4} /\ 0:V1.4S={0x1010101,0x1010101,0x1010101,0x1010101} diff --git a/herd/tests/instructions/AArch64.sve/V08.litmus.expected b/herd/tests/instructions/AArch64.sve/V08.litmus.expected new file mode 100644 index 0000000000..fa1a3572c4 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V08.litmus.expected @@ -0,0 +1,11 @@ +Test V08 Required +States 1 +0:V0.4S={4,4,4,4}; 0:V1.4S={16843009,16843009,16843009,16843009}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={4,4,4,4} /\ 0:V1.4S={16843009,16843009,16843009,16843009}) +Observation V08 Always 1 0 +Hash=325848570958ac8e2b931cf1c48564dd + diff --git a/herd/tests/instructions/AArch64.sve/V09.litmus b/herd/tests/instructions/AArch64.sve/V09.litmus new file mode 100644 index 0000000000..535dca1234 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V09.litmus @@ -0,0 +1,14 @@ +AArch64 V09 +(* Contiguous store words from vector (scalar index) *) +{ + uint8_t x[4]; + + 0:X0=x; +} +P0 ; + MOV Z1.S,#1 ; + MOV X1,#0 ; + PTRUE P0.H,VL4 ; + ST1B {Z1.H},P0,[X0,X1] ; + +forall x={1,0,1,0} diff --git a/herd/tests/instructions/AArch64.sve/V09.litmus.expected b/herd/tests/instructions/AArch64.sve/V09.litmus.expected new file mode 100644 index 0000000000..30792c89cf --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V09.litmus.expected @@ -0,0 +1,11 @@ +Test V09 Required +States 1 +x={1,0,1,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,0,1,0}) +Observation V09 Always 1 0 +Hash=b5926a609acc60970724fd0fd2512677 + diff --git a/herd/tests/instructions/AArch64.sve/V10.litmus b/herd/tests/instructions/AArch64.sve/V10.litmus new file mode 100644 index 0000000000..c826f61ad9 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V10.litmus @@ -0,0 +1,15 @@ +AArch64 V10 +(* Contiguous store two-word structures from two vectors (scalar index) *) +{ + uint16_t x[8]; + + 0:X0=x; +} +P0 ; + MOV Z1.H,#1 ; + MOV Z2.H,#2 ; + MOV X1,#0 ; + PTRUE P0.H,VL4 ; + ST2H {Z1.H,Z2.H},P0,[X0,X1, LSL #1] ; + +forall x={1,2,1,2,1,2,1,2} diff --git a/herd/tests/instructions/AArch64.sve/V10.litmus.expected b/herd/tests/instructions/AArch64.sve/V10.litmus.expected new file mode 100644 index 0000000000..9b094a5482 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V10.litmus.expected @@ -0,0 +1,11 @@ +Test V10 Required +States 1 +x={1,2,1,2,1,2,1,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,2,1,2,1,2,1,2}) +Observation V10 Always 1 0 +Hash=b9d08a35b5381eeb0056a1674df33653 + diff --git a/herd/tests/instructions/AArch64.sve/V11.litmus b/herd/tests/instructions/AArch64.sve/V11.litmus new file mode 100644 index 0000000000..2e8922659d --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V11.litmus @@ -0,0 +1,16 @@ +AArch64 V11 +(* Contiguous store three-word structures from three vectors (scalar index) *) +{ + int x[12]; + + 0:X0=x; +} +P0 ; + MOV Z1.S,#1 ; + MOV Z2.S,#2 ; + MOV Z3.S,#3 ; + MOV X1,#0 ; + PTRUE P0.S,VL4 ; + ST3W {Z1.S,Z2.S,Z3.S},P0,[X0,X1, LSL #2]; + +forall x={1,2,3,1,2,3,1,2,3,1,2,3} diff --git a/herd/tests/instructions/AArch64.sve/V11.litmus.expected b/herd/tests/instructions/AArch64.sve/V11.litmus.expected new file mode 100644 index 0000000000..3ae3f517f0 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V11.litmus.expected @@ -0,0 +1,11 @@ +Test V11 Required +States 1 +x={1,2,3,1,2,3,1,2,3,1,2,3}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,2,3,1,2,3,1,2,3,1,2,3}) +Observation V11 Always 1 0 +Hash=a3c15b9b49c3645ba9113a9354c7a46a + diff --git a/herd/tests/instructions/AArch64.sve/V12.litmus b/herd/tests/instructions/AArch64.sve/V12.litmus new file mode 100644 index 0000000000..c7875b67b1 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V12.litmus @@ -0,0 +1,17 @@ +AArch64 V12 +(* Contiguous store four-word structures from four vectors (scalar index) *) +{ + uint64_t x[8]; + + 0:X0=x; +} +P0 ; + MOV Z1.D,#1 ; + MOV Z2.D,#2 ; + MOV Z3.D,#3 ; + MOV Z4.D,#4 ; + MOV X1,#0 ; + PTRUE P0.D,VL2 ; + ST4D {Z1.D,Z2.D,Z3.D,Z4.D},P0,[X0,X1, LSL #3] ; + +forall x={1,2,3,4,1,2,3,4} diff --git a/herd/tests/instructions/AArch64.sve/V12.litmus.expected b/herd/tests/instructions/AArch64.sve/V12.litmus.expected new file mode 100644 index 0000000000..84a3a3970d --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V12.litmus.expected @@ -0,0 +1,11 @@ +Test V12 Required +States 1 +x={1,2,3,4,1,2,3,4}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={1,2,3,4,1,2,3,4}) +Observation V12 Always 1 0 +Hash=f059652d76c3a984d2cb9d3649b83389 + diff --git a/herd/tests/instructions/AArch64.sve/V13.litmus b/herd/tests/instructions/AArch64.sve/V13.litmus new file mode 100644 index 0000000000..df817f62ba --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V13.litmus @@ -0,0 +1,10 @@ +AArch64 V13 +(* Unsigned add reduction to scalar *) +{ +} +P0 ; + MOV Z0.S,#4 ; + PTRUE P0.S,VL3 ; + UADDV D1,P0,Z0.S ; + +forall 0:V1.4S={12,0,0,0} diff --git a/herd/tests/instructions/AArch64.sve/V13.litmus.expected b/herd/tests/instructions/AArch64.sve/V13.litmus.expected new file mode 100644 index 0000000000..14bf8976ac --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V13.litmus.expected @@ -0,0 +1,11 @@ +Test V13 Required +States 1 +0:V1.4S={12,0,0,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={12,0,0,0}) +Observation V13 Always 1 0 +Hash=5e391bba5dda1e4a4f1f0a45c2c48975 + diff --git a/herd/tests/instructions/AArch64.sve/V14.litmus b/herd/tests/instructions/AArch64.sve/V14.litmus new file mode 100644 index 0000000000..8510ae7cfc --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V14.litmus @@ -0,0 +1,12 @@ +AArch64 V14 +(* INDEX *) +{ +} +P0 ; + MOV W0,#2 ; + INDEX Z0.S,W0,#3 ; + INDEX Z1.S,#0,#1 ; + INDEX Z2.H,W0,W0 ; + INDEX Z3.D,#1,#2 ; + +forall 0:V0.4S={2,5,8,11} /\ 0:V1.4S={0,1,2,3} /\ 0:V2.4S={0x40002,0x80006,0xc000a,0x10000e} /\ 0:V3.4S={1,0,3,0} diff --git a/herd/tests/instructions/AArch64.sve/V14.litmus.expected b/herd/tests/instructions/AArch64.sve/V14.litmus.expected new file mode 100644 index 0000000000..91975537e0 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V14.litmus.expected @@ -0,0 +1,11 @@ +Test V14 Required +States 1 +0:V0.4S={2,5,8,11}; 0:V1.4S={0,1,2,3}; 0:V2.4S={262146,524294,786442,1048590}; 0:V3.4S={1,0,3,0}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={2,5,8,11} /\ 0:V1.4S={0,1,2,3} /\ 0:V2.4S={262146,524294,786442,1048590} /\ 0:V3.4S={1,0,3,0}) +Observation V14 Always 1 0 +Hash=93b29826278f620a466dfd3446e3f014 + diff --git a/herd/tests/instructions/AArch64.sve/V15.litmus b/herd/tests/instructions/AArch64.sve/V15.litmus new file mode 100644 index 0000000000..d2ac5fbf5e --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V15.litmus @@ -0,0 +1,35 @@ +AArch64 V15 +(* WHILELT (predicate) *) +{ +} +P0 ; + MOV W0,#0 ; + MOV W1,#0 ; + WHILELT P0.S,W0,W1 ; (* No active elemenets *) + B.EQ L0 ; (* All Active elements were FALSE or there were no Active elements. Z == 1 *) + MOV W2,#1 ; +L0: ; + B.CS L1 ; (* The Last active element was FALSE or there were no Active elements. C == 1 *) + MOV W2,#2 ; +L1: ; + B.PL L3 ; (* The First active element was FALSE or there were no Active elements. N == 0 *) + MOV W2,#3 ; +L3: + MOV W1,#1 ; (* First element is active but not Last *) + WHILELT P1.S,W0,W1 ; + B.NE L4 ; (* An Active element was TRUE. Z == 0 *) + MOV W2,#4 ; +L4: ; + B.MI L5 ; (* The First active element was TRUE. N == 1 *) + MOV W2,#5 ; +L5: ; + B.HI L6 ; (* An Active element was TRUE, but the Last active element was FALSE. C ==1 && Z == 0 *) + MOV W2,#6 ; +L6: ; + MOV W1,#64 ; + WHILELT P2.S,W0,W1 ; (* All elements are active including Last *) + B.CC END ; (* The Last active element was TRUE. C == 0 *) + MOV W2,#7 ; +END: ; + +forall 0:X2=0 diff --git a/herd/tests/instructions/AArch64.sve/V15.litmus.expected b/herd/tests/instructions/AArch64.sve/V15.litmus.expected new file mode 100644 index 0000000000..ed0c94278c --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V15.litmus.expected @@ -0,0 +1,11 @@ +Test V15 Required +States 1 +0:X2=0; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:X2=0) +Observation V15 Always 1 0 +Hash=a5e517ea61f31faa909e892130e6b597 + diff --git a/herd/tests/instructions/AArch64.sve/V16.litmus b/herd/tests/instructions/AArch64.sve/V16.litmus new file mode 100644 index 0000000000..2885de0684 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V16.litmus @@ -0,0 +1,15 @@ +AArch64 V16 +{ + int x[4]={0,1,2,3}; + int y[4]={5,6,7,8}; + 0:X0=x; + 0:X1=y; +} +P0 ; +MOV W2,#0 ; +MOV W3,#4 ; +WHILELT P0.S,W2,W3 ; +LD1W {Z1.S},P0/Z,[X0] ; +LD1W {Z2.S},P0/Z,[X1,Z1.S,UXTW #2]; +forall 0:V1.4S={0,1,2,3} /\ 0:V2.4S={5,6,7,8} + diff --git a/herd/tests/instructions/AArch64.sve/V16.litmus.expected b/herd/tests/instructions/AArch64.sve/V16.litmus.expected new file mode 100644 index 0000000000..60414f1b29 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V16.litmus.expected @@ -0,0 +1,11 @@ +Test V16 Required +States 1 +0:V1.4S={0,1,2,3}; 0:V2.4S={5,6,7,8}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V1.4S={0,1,2,3} /\ 0:V2.4S={5,6,7,8}) +Observation V16 Always 1 0 +Hash=97b55b740335168c5e12a073693d253a + diff --git a/herd/tests/instructions/AArch64.sve/V17.litmus b/herd/tests/instructions/AArch64.sve/V17.litmus new file mode 100644 index 0000000000..8c64d0c15d --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V17.litmus @@ -0,0 +1,18 @@ +AArch64 V17 +{ + int x[4]; + int y[4]; + 0:X0=x; + 0:X1=y +} +P0 ; +MOV W2,#0 ; +MOV W3,#4 ; +WHILELT P0.S,W2,W3 ; +MOV Z0.S,#4 ; +ST1W {Z0.S},P0,[X0] ; +INDEX Z1.S,W2,#1 ; +ST1W {Z1.S},P0,[X1,Z1.S,UXTW #2] ; + +forall x={4,4,4,4} /\ y={0,1,2,3} + diff --git a/herd/tests/instructions/AArch64.sve/V17.litmus.expected b/herd/tests/instructions/AArch64.sve/V17.litmus.expected new file mode 100644 index 0000000000..954794888e --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V17.litmus.expected @@ -0,0 +1,11 @@ +Test V17 Required +States 1 +x={4,4,4,4}; y={0,1,2,3}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (x={4,4,4,4} /\ y={0,1,2,3}) +Observation V17 Always 1 0 +Hash=f50938e525aa0cec3773dbec42193ada + diff --git a/herd/tests/instructions/AArch64.sve/V18.litmus b/herd/tests/instructions/AArch64.sve/V18.litmus new file mode 100644 index 0000000000..c23b58bba3 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V18.litmus @@ -0,0 +1,11 @@ +AArch64 V18 +{ + int x[8] = {1,2,1,2,1,2,1,2}; + + 0:X0=x; +} +P0 ; + PTRUE P0.S,VL4 ; + LD2W {Z0.S,Z1.S},P0/Z,[X0] ; + +forall 0:V0.4S = {1,1,1,1} /\ 0:V1.4S = {2,2,2,2} diff --git a/herd/tests/instructions/AArch64.sve/V18.litmus.expected b/herd/tests/instructions/AArch64.sve/V18.litmus.expected new file mode 100644 index 0000000000..6f9dc5ed96 --- /dev/null +++ b/herd/tests/instructions/AArch64.sve/V18.litmus.expected @@ -0,0 +1,11 @@ +Test V18 Required +States 1 +0:V0.4S={1,1,1,1}; 0:V1.4S={2,2,2,2}; +Ok +Witnesses +Positive: 1 Negative: 0 +Flag Scalable-Vector-Extensions-is-work-in-progress +Condition forall (0:V0.4S={1,1,1,1} /\ 0:V1.4S={2,2,2,2}) +Observation V18 Always 1 0 +Hash=55d8e3d714ba1b661c4ad767c5d3cbee + diff --git a/herd/variant.ml b/herd/variant.ml index 6809bb543e..5c33a1e390 100644 --- a/herd/variant.ml +++ b/herd/variant.ml @@ -46,6 +46,7 @@ type t = | TooFar | Morello | Neon + | SVE (* Branch speculation+ cat computation of dependencies *) | Deps | Instances (* Compute dependencies on instruction instances *) @@ -133,6 +134,7 @@ let parse s = match Misc.lowercase s with | "toofar" -> Some TooFar | "morello" -> Some Morello | "neon" -> Some Neon +| "sve" -> Some SVE | "deps" -> Some Deps | "instances"|"instance" -> Some Instances | "kvm" @@ -210,6 +212,7 @@ let pp = function | TooFar -> "TooFar" | Morello -> "Morello" | Neon -> "Neon" + | SVE -> "sve" | Deps -> "Deps" | Instances -> "Instances" | VMSA -> "vmsa" diff --git a/herd/variant.mli b/herd/variant.mli index 9a704db89f..cf9ede1e90 100644 --- a/herd/variant.mli +++ b/herd/variant.mli @@ -44,6 +44,7 @@ type t = | TooFar (* Do not discard candidates with TooFar events *) | Morello | Neon + | SVE (* Branch speculation+ cat computation of dependencies *) | Deps | Instances (* Compute dependencies on instruction instances *) diff --git a/jingle/AArch64Arch_jingle.ml b/jingle/AArch64Arch_jingle.ml index 1537a7bfd7..9efe7568d2 100644 --- a/jingle/AArch64Arch_jingle.ml +++ b/jingle/AArch64Arch_jingle.ml @@ -271,6 +271,7 @@ include Arch.MakeArch(struct conv_idx idx >! fun idx -> E.Imm idx | E.Reg (v,r,e,k) -> find_cst k >> fun k -> conv_reg r >! fun r -> E.Reg (v,r,e,k) + | _ -> assert false end in @@ -664,4 +665,12 @@ include Arch.MakeArch(struct | I_MOVI_V _ | I_MOVI_S _ | I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _ -> Warn.fatal "Neon instructions are not implemented yet" + (* Scalable Vector Extension *) + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ | I_DUP_SV _ | I_ADD_SV _ + | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ + | I_MOV_SV _ | I_PTRUE _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ + -> Warn.fatal "SVE instructions are not implemented yet" end) diff --git a/lib/AArch64Base.ml b/lib/AArch64Base.ml index 4410b9aa16..b919cf3cb6 100644 --- a/lib/AArch64Base.ml +++ b/lib/AArch64Base.ml @@ -46,6 +46,16 @@ type vec_reg = | V24 | V25 | V26 | V27 | V28 | V29 | V30 | V31 +type pred_reg = + | P0 | P1 | P2 | P3 + | P4 | P5 | P6 | P7 + | P8 | P9 | P10 | P11 + | P12 | P13 | P14 | P15 + + +type pred_mode = + | Zero | Merge + let arrange_specifier = [ (1,64),".1D" ; (1,128),".1Q" ; @@ -56,6 +66,16 @@ let arrange_specifier = (0,8),".B" ; (0,16),".H" ; (0,32),".S" ; (0,64),".D"; ] +let sve_arrange_specifier = +[ + 0,"";8,".B"; 16,".H"; 32,".S"; 64,".D"; 128,".Q"; +] + +let sve_pred_modifier = +[ + Zero,"/Z"; Merge,"/M"; +] + (********************) (* System registers *) (* (Some of...) *) @@ -96,6 +116,9 @@ type reg = | Tag of gpr | Vreg of (vec_reg * (int * int)) | SIMDreg of vec_reg + | Zreg of (vec_reg * int) + | Preg of (pred_reg * int) + | PMreg of (pred_reg * pred_mode) | Symbolic_reg of string | Internal of int | NZCV @@ -173,6 +196,18 @@ let vec_regs = let vregs = List.map (fun v -> Vreg (v,(4,32))) vec_regs +let zregs = List.map (fun z -> Zreg (z,32)) vec_regs + +let pred_regs = +[ + P0 ; P1; P2 ; P3; + P4 ; P5; P6 ; P7; + P8 ; P9; P10; P11; + P12; P13; P14; P15; +] + +let pregs = List.map (fun p -> Preg (p,32)) pred_regs + let linkreg = Ireg R30 let elr_el1 = SysReg ELR_EL1 let tfsr = SysReg TFSR_ELx @@ -294,6 +329,26 @@ let qvrs = V28,"Q28" ; V29,"Q29" ; V30,"Q30" ; V31, "Q31"; ] +let zvrs = +[ + V0 ,"Z0"; V1 ,"Z1"; V2 ,"Z2"; V3 ,"Z3"; + V4 ,"Z4"; V5 ,"Z5"; V6 ,"Z6"; V7 ,"Z7"; + V8 ,"Z8"; V9 ,"Z9"; V10,"Z10"; V11,"Z11"; + V12,"Z12"; V13,"Z13"; V14,"Z14"; V15,"Z15"; + V16,"Z16"; V17,"Z17"; V18,"Z18"; V19,"Z19"; + V20,"Z20"; V21,"Z21"; V22,"Z22"; V23,"Z23"; + V24,"Z24"; V25,"Z25"; V26,"Z26"; V27,"Z27"; + V28,"Z28"; V29,"Z29"; V30,"Z30"; V31,"Z31"; +] + +let pvrs = +[ + P0 ,"P0"; P1 ,"P1"; P2 ,"P2"; P3 ,"P3"; + P4 ,"P4"; P5 ,"P5"; P6 ,"P6"; P7 ,"P7"; + P8 ,"P8"; P9 ,"P9"; P10,"P10"; P11,"P11"; + P12,"P12"; P13,"P13"; P14,"P14"; P15,"P15"; +] + let simd_regs = let rs = bvrs @ hvrs @ svrs @ dvrs @ qvrs in List.map (fun (r,s) -> s,SIMDreg r) rs @@ -333,6 +388,37 @@ let parse_vreg = let parse_simd_reg = parse_some simd_regs +let parse_zreg = + let zplist = parse_list zvrs + and arplist = parse_list sve_arrange_specifier in + fun s -> + try let (g1, g2) = + ignore (Str.search_forward (Str.regexp "\\(Z[0-9]+\\)\\(\\.[B,D,Q,H,S]\\)") (Misc.uppercase s) 0); + (Str.matched_group 1 s, Str.matched_group 2 s); + in Some (Zreg (List.assoc g1 zplist, List.assoc g2 arplist)) + with Not_found -> None + +let parse_preg = + let pplist = parse_list pvrs + and splist = parse_list sve_arrange_specifier in + fun s -> + try let (g1, g2) = + ignore (Str.search_forward (Str.regexp "\\(P[0-9]+\\)\\(\\.[B,D,Q,H,S]\\)?") (Misc.uppercase s) 0); + let suffix = try Str.matched_group 2 s with Not_found -> "" in + (Str.matched_group 1 s, suffix); + in Some (Preg (List.assoc g1 pplist, List.assoc g2 splist)) + with Not_found -> None + +let parse_pmreg = + let pplist = parse_list pvrs + and mplist = parse_list sve_pred_modifier in + fun s -> + try let (g1, g2) = + ignore (Str.search_forward (Str.regexp "\\(P[0-9]+\\)\\(\\/[Z,M]\\)") (Misc.uppercase s) 0); + (Str.matched_group 1 s, Str.matched_group 2 s); + in Some (PMreg (List.assoc g1 pplist, List.assoc g2 mplist)) + with Not_found -> None + let parse_reg s = match parse_vreg s with | Some v -> Some v | None -> @@ -369,9 +455,32 @@ let pp_simd_scalar_reg rl r = match r with | SIMDreg r -> (try List.assoc r rl with Not_found -> assert false) | _ -> assert false +let pp_zreg r = match r with +| Zreg (r',s) -> + (try List.assoc r' zvrs with Not_found -> assert false) ^ + (try List.assoc s sve_arrange_specifier with Not_found -> assert false) +| _ -> assert false + +let pp_preg_simple r = match r with +| Preg (r',_) +| PMreg (r',_) -> + (try List.assoc r' pvrs with Not_found -> assert false) +| _ -> assert false + +let pp_preg r = match r with +| Preg (r',s) -> + (try List.assoc r' pvrs with Not_found -> assert false) ^ + (try List.assoc s sve_arrange_specifier with Not_found -> assert false) +| PMreg (r',m) -> + (try List.assoc r' pvrs with Not_found -> assert false) ^ + (try List.assoc m sve_pred_modifier with Not_found -> assert false) +| _ -> assert false + let pp_reg r = match r with | Vreg _ -> pp_simd_vector_reg r | SIMDreg _ -> pp_simd_scalar_reg vvrs r +| Zreg _ -> pp_zreg r +| Preg _ | PMreg _ -> pp_preg r | _ -> pp_xreg r let pp_i n = match n with | 1 -> "instr:\"NOP\"" @@ -393,6 +502,9 @@ let pp_vreg v r = match v with let reg_compare r1 r2 = let strip_reg r = match r with | Vreg (v,_) -> SIMDreg v + | Zreg (v,_) -> SIMDreg v + | Preg (v,_) -> Preg (v,0) + | PMreg (v,_) -> Preg (v,0) | _ -> r in compare (strip_reg r1) (strip_reg r2) @@ -408,7 +520,6 @@ let type_reg r = | Vreg (_,(n_elt,sz)) -> Array (TestType.tr_nbits sz,n_elt) | _ -> Base "int" - (************) (* Barriers *) (************) @@ -765,6 +876,26 @@ let inverse_cond = function | LS -> HI | AL -> AL +(* Pattern specifier for scalable vector instructions *) +type pattern = + | POW2 + | VL1 + | VL2 + | VL3 + | VL4 + | VL5 + | VL6 + | VL7 + | VL8 + | VL16 + | VL32 + | VL64 + | VL128 + | VL256 + | MUL4 + | MUL3 + | ALL + type op = | ADD | ADDS | SUB | SUBS | AND | ANDS | ORR | ORN | EOR | EON | ASR | LSR | LSL | ROR | BICS | BIC @@ -847,6 +978,7 @@ module MemExt = struct (* Extensions for memory accesses *) type 'k ext = | Imm of 'k idx | Reg of variant * reg * rext * 'k + | ZReg of reg * rext * 'k let v2idx_reg v r = match v with @@ -1152,6 +1284,201 @@ type 'k kinstruction = | I_STLR of variant * reg * reg | I_STXR of variant * st_type * reg * reg * reg | I_STXP of variant * st_type * reg * reg * reg * reg +(* Scalable Vector Extension*) + (* PTRUE .{, } *) + | I_PTRUE of reg * pattern + (* WHILEL{LT,LE,LO,LS} ., , *) + | I_WHILELT of reg * variant * reg * reg + | I_WHILELE of reg * variant * reg * reg + | I_WHILELO of reg * variant * reg * reg + | I_WHILELS of reg * variant * reg * reg + (* UADDV
, , . *) + | I_UADDV of simd_variant * reg * reg * reg + (* + * LD1{B,H,W,D} (scalar plus scalar, single register) + * + * LD1B { . }, /Z, [, ] for T in D,S,H,B + * LD1H { . }, /Z, [, , LSL #1] for T in D,S,H + * LD1W { . }, /Z, [, , LSL #2] for T in D,S + * LD1D { .D }, /Z, [, , LSL #3] + * + * LD1{B,H,W,D} (scalar plus immediate, single register) + * + * LD1B { . }, /Z, [{, #, MUL VL}] for T in D,S,H,B + * LD1H { . }, /Z, [{, #, MUL VL}] for T in D,S,H + * LD1W { .S }, /Z, [{, #, MUL VL}] for T in D,S + * LD1D { .D }, /Z, [{, #, MUL VL}] + * + * LD1{B,H,W,D} (scalar plus vector) + * + * LD1B { .D }, /Z, [, .D, ] + * LD1B { .S }, /Z, [, .S, ] + * LD1B { .D }, /Z, [, .D] + * LD1H { .S }, /Z, [, .S, #1] + * LD1H { .D }, /Z, [, .D, #1] + * LD1H { .D }, /Z, [, .D, ] + * LD1H { .S }, /Z, [, .S, ] + * LD1H { .D }, /Z, [, .D, LSL #1] + * LD1H { .D }, /Z, [, .D] + * LD1W { .S }, /Z, [, .S, #2] + * LD1W { .D }, /Z, [, .D, #2] + * LD1W { .D }, /Z, [, .D, ] + * LD1W { .S }, /Z, [, .S, ] + * LD1W { .D }, /Z, [, .D, LSL #2] + * LD1W { .D }, /Z, [, .D] + * LD1D { .D }, /Z, [, .D, #3] + * LD1D { .D }, /Z, [, .D, ] + * LD1D { .D }, /Z, [, .D, LSL #3] + * LD1D { .D }, /Z, [, .D] + *) + | I_LD1SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * LD2{B,H,W,D} (scalar plus scalar) + * + * LD2B { .B, .B }, /Z, [, ] + * LD2H { .H, .H }, /Z, [, , LSL #1] + * LD2W { .S, .S }, /Z, [, , LSL #2] + * LD2D { .D, .D }, /Z, [, , LSL #3] + * + * LD2{B,H,W,D} (scalar plus immediate) + * + * LD2B { .B, .B }, /Z, [{, #, MUL VL}] + * LD2H { .H, .H }, /Z, [{, #, MUL VL}] + * LD2W { .S, .S }, /Z, [{, #, MUL VL}] + * LD2D { .D, .D }, /Z, [{, #, MUL VL}] + *) + | I_LD2SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * LD3{B,H,W,D} (scalar plus scalar) + * + * LD3B { .B, .B, .B }, /Z, [, ] + * LD3H { .H, .H, .H }, /Z, [, , LSL #1] + * LD3W { .S, .S, .S }, /Z, [, , LSL #2] + * LD3D { .D, .D, .D }, /Z, [, , LSL #3] + * + * LD3{B,H,W,D} (scalar plus immediate) + * + * LD3B { .B, .B, .B }, /Z, [{, #, MUL VL}] + * LD3H { .H, .H, .H }, /Z, [{, #, MUL VL}] + * LD3W { .S, .S, .S }, /Z, [{, #, MUL VL}] + * LD3D { .D, .D, .D }, /Z, [{, #, MUL VL}] + *) + | I_LD3SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * LD4{B,H,W,D} (scalar plus scalar) + * + * LD4B { .B, .B, .B, .B }, /Z, [, ] + * LD4H { .H, .H, .H, .H }, /Z, [, , LSL #1] + * LD4W { .S, .S, .S, .S }, /Z, [, , LSL #2] + * LD4D { .D, .D, .D, .D }, /Z, [, , LSL #3] + * + * LD4{B,H,W,D} (scalar plus immediate) + * + * LD4B { .B, .B, .B, .B }, /Z, [{, #, MUL VL}] + * LD4H { .H, .H, .H, .H }, /Z, [{, #, MUL VL}] + * LD4W { .S, .S, .S, .S }, /Z, [{, #, MUL VL}] + *) + | I_LD4SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * ST1{B,H,W,D} (scalar plus scalar) + * + * ST1B { . }, , [, ] for T in D,S,H,B + * ST1H { . }, , [, , LSL #1] for T in D,S,H + * ST1W { . }, , [, , LSL #2] for T in D,S + * ST1D { .D }, , [, , LSL #3] + * + * ST1{B,H,W,D} (scalar plus immediate) + * + * ST1B { . }, , [{, #, MUL VL}] for T in D,S,H,B + * ST1H { . }, , [{, #, MUL VL}] for T in D,S,H + * ST1W { . }, , [{, #, MUL VL}] for T in D,S + * ST1D { .D }, , [{, #, MUL VL}] + * + * ST1{B,H,W,D} (scalar plus vector) + * + * ST1B { .D }, , [, .D, ] + * ST1B { .S }, , [, .S, ] + * ST1B { .D }, , [, .D] + * ST1H { .S }, , [, .S, #1] + * ST1H { .D }, , [, .D, #1] + * ST1H { .D }, , [, .D, ] + * ST1H { .S }, , [, .S, ] + * ST1H { .D }, , [, .D, LSL #1] + * ST1H { .D }, , [, .D] + * ST1W { .S }, , [, .S, #2] + * ST1W { .D }, , [, .D, #2] + * ST1W { .D }, , [, .D, ] + * ST1W { .S }, , [, .S, ] + * ST1W { .D }, , [, .D, LSL #2] + * ST1W { .D }, , [, .D] + * ST1D { .D }, , [, .D, #3] + * ST1D { .D }, , [, .D, ] + * ST1D { .D }, , [, .D, LSL #3] + * ST1D { .D }, , [, .D] + *) + | I_ST1SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * ST2{B,H,W,D} (scalar plus scalar) + * + * ST2B { .B, .B }, , [, ] + * ST2H { .H, .H }, , [, , LSL #1] + * ST2W { .S, .S }, , [, , LSL #2] + * ST2D { .D, .D }, , [, , LSL #3] + * + * ST2{B,H,W,D} (scalar plus immediate) + * + * ST2B { .B, .B }, , [{, #, MUL VL}] + * ST2H { .H, .H }, , [{, #, MUL VL}] + * ST2W { .S, .S }, , [{, #, MUL VL}] + * ST2D { .D, .D }, , [{, #, MUL VL}] + *) + | I_ST2SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * ST3{B,H,W,D} (scalar plus scalar) + * + * ST3B { .B, .B, .B }, , [, ] + * ST3H { .H, .H, .H }, , [, , LSL #1] + * ST3W { .S, .S, .S }, , [, , LSL #2] + * ST3D { .D, .D, .D }, , [, , LSL #3] + * + * ST3{B,H,W,D} (scalar plus immediate) + * + * ST3B { .B, .B }, , [{, #, MUL VL}] + * ST3H { .H, .H }, , [{, #, MUL VL}] + * ST3W { .S, .S }, , [{, #, MUL VL}] + * ST3D { .D, .D }, , [{, #, MUL VL}] + *) + | I_ST3SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* + * ST4{B,H,W,D} (scalar plus scalar) + * + * ST4B { .B, .B, .B, .B }, , [, ] + * ST4H { .H, .H, .H, .H }, , [, , LSL #1] + * ST4W { .S, .S, .S, .S }, , [, , LSL #2] + * ST4D { .D, .D, .D, .D }, , [, , LSL #3] + * + * ST4{B,H,W,D} (scalar plus immediate) + * + * ST4B { .B, .B }, , [{, #, MUL VL}] + * ST4H { .H, .H }, , [{, #, MUL VL}] + * ST4W { .S, .S }, , [{, #, MUL VL}] + * ST4D { .D, .D }, , [{, #, MUL VL}] + *) + | I_ST4SP of simd_variant * reg list * reg * reg * 'k MemExt.ext + (* MOV ., #{, } *) + | I_MOV_SV of reg * 'k * 'k s + (* DUP ., *) + | I_DUP_SV of reg * variant * reg + (* ADD ., ., . *) + | I_ADD_SV of reg * reg * reg + (* INDEX ., , # *) + | I_INDEX_SI of reg * variant * reg * 'k + (* INDEX ., #, *) + | I_INDEX_IS of reg * variant * 'k * reg + (* INDEX ., , *) + | I_INDEX_SS of reg * variant * reg * reg + (* INDEX ., #, # *) + | I_INDEX_II of reg * 'k * 'k (* Morello *) | I_ALIGND of reg * reg * 'k | I_ALIGNU of reg * reg * 'k @@ -1248,6 +1575,25 @@ let pp_cond = function | LE -> "LE" | AL -> "AL" +let pp_pattern = function + | POW2 -> "POW2" + | VL1 -> "VL1" + | VL2 -> "VL2" + | VL3 -> "VL3" + | VL4 -> "VL4" + | VL5 -> "VL5" + | VL6 -> "VL6" + | VL7 -> "VL7" + | VL8 -> "VL8" + | VL16 -> "VL16" + | VL32 -> "VL32" + | VL64 -> "VL64" + | VL128 -> "VL128" + | VL256 -> "VL256" + | MUL4 -> "MUL4" + | MUL3 -> "MUL3" + | ALL -> "ALL" + let pp_vsimdreg v r = match v with | VSIMD8 -> pp_simd_scalar_reg bvrs r | VSIMD16 -> pp_simd_scalar_reg hvrs r @@ -1255,6 +1601,12 @@ let pp_vsimdreg v r = match v with | VSIMD64 -> pp_simd_scalar_reg dvrs r | VSIMD128 -> pp_simd_scalar_reg qvrs r +let pp_simd_variant v = match v with +| VSIMD8 -> "B" +| VSIMD16 -> "H" +| VSIMD32 -> "W" +| VSIMD64 -> "D" +| VSIMD128 -> "Q" let pp_op = function | ADD -> "ADD" @@ -1375,7 +1727,13 @@ let do_pp_instruction m = | Reg (v,r,(UXTW|SXTW|SXTX as ext),k) when m.zerop k-> sprintf "[%s,%s,%s]" (pp_xreg ra) (pp_vreg v r) (pp_sext ext) | Reg (v,r,ext,k) -> - sprintf "[%s,%s,%s %s]" (pp_xreg ra) (pp_vreg v r) (pp_sext ext) (m.pp_k k) in + sprintf "[%s,%s,%s %s]" (pp_xreg ra) (pp_vreg v r) (pp_sext ext) (m.pp_k k) + | ZReg (r,LSL,k) when m.zerop k -> + sprintf "[%s,%s]" (pp_xreg ra) (pp_zreg r) + | ZReg (r,(UXTW|SXTW|SXTX as ext),k) when m.zerop k-> + sprintf "[%s,%s,%s]" (pp_xreg ra) (pp_zreg r) (pp_sext ext) + | ZReg (r,ext,k) -> + sprintf "[%s,%s,%s %s]" (pp_xreg ra) (pp_zreg r) (pp_sext ext) (m.pp_k k) in let pp_mem_ext memo v rt ra idx = Printf.sprintf "%s %s,%s" memo (pp_vreg v rt) (pp_addr ra idx) in @@ -1601,7 +1959,49 @@ let do_pp_instruction m = pp_simd_rrr "ADD" r1 r2 r3 | I_ADD_SIMD_S (r1,r2,r3) -> pp_simd_srrr "ADD" r1 r2 r3 - +(* Scalable Vector Extention *) + | I_PTRUE (predicate,pattern) -> + sprintf "PTRUE %s,%s" (pp_preg predicate) (pp_pattern pattern) + | I_WHILELT (p1,v,r2,r3) -> + sprintf "WHILELT %s,%s,%s" (pp_preg p1) (pp_vreg v r2) (pp_vreg v r3) + | I_WHILELE (p1,v,r2,r3) -> + sprintf "WHILELE %s,%s,%s" (pp_preg p1) (pp_vreg v r2) (pp_vreg v r3) + | I_WHILELO (p1,v,r2,r3) -> + sprintf "WHILELO %s,%s,%s" (pp_preg p1) (pp_vreg v r2) (pp_vreg v r3) + | I_WHILELS (p1,v,r2,r3) -> + sprintf "WHILELS %s,%s,%s" (pp_preg p1) (pp_vreg v r2) (pp_vreg v r3) + | I_UADDV (v,r1,r2,r3) -> + sprintf "UADDV %s,%s,%s" (pp_vsimdreg v r1) (pp_preg_simple r2) (pp_zreg r3) + | I_LD1SP (v,rs,r2,r3,idx) -> + sprintf "LD1%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg r2) (pp_addr r3 idx) + | I_LD2SP (v,rs,r2,r3,idx) -> + sprintf "LD2%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg r2) (pp_addr r3 idx) + | I_LD3SP (v,rs,r2,r3,idx) -> + sprintf "LD3%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg r2) (pp_addr r3 idx) + | I_LD4SP (v,rs,r2,r3,idx) -> + sprintf "LD4%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg r2) (pp_addr r3 idx) + | I_ST1SP (v,rs,r2,r3,idx) -> + sprintf "ST1%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg_simple r2) (pp_addr r3 idx) + | I_ST2SP (v,rs,r2,r3,idx) -> + sprintf "ST2%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg_simple r2) (pp_addr r3 idx) + | I_ST3SP (v,rs,r2,r3,idx) -> + sprintf "ST3%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg_simple r2) (pp_addr r3 idx) + | I_ST4SP (v,rs,r2,r3,idx) -> + sprintf "ST4%s {%s},%s,%s" (pp_simd_variant v) (String.concat "," (List.map pp_zreg rs)) (pp_preg_simple r2) (pp_addr r3 idx) + | I_MOV_SV (r,k,s) -> + sprintf "MOV %s,%s%s" (pp_zreg r) (m.pp_k k) (pp_barrel_shift "," s (m.pp_k)) + | I_DUP_SV (r1,v,r2) -> + sprintf "DUP %s,%s" (pp_zreg r1) (pp_vreg v r2) + | I_ADD_SV (r1,r2,r3) -> + sprintf "ADD %s,%s,%s" (pp_zreg r1) (pp_zreg r2) (pp_zreg r3) + | I_INDEX_SI (r1,v,r2,k) -> + sprintf "INDEX %s,%s,%s" (pp_zreg r1) (pp_vreg v r2) (m.pp_k k) + | I_INDEX_IS (r1,v,k,r2) -> + sprintf "INDEX %s,%s,%s" (pp_zreg r1) (m.pp_k k) (pp_vreg v r2) + | I_INDEX_SS (r1,v,r2,r3) -> + sprintf "INDEX %s,%s,%s" (pp_zreg r1) (pp_vreg v r2) (pp_vreg v r3) + | I_INDEX_II (r1,k1,k2) -> + sprintf "INDEX %s,%s,%s" (pp_zreg r1) (m.pp_k k1) (m.pp_k k2) (* Morello *) | I_ALIGND (r1,r2,k) -> sprintf "ALIGND %s,%s,%s" (pp_creg r1) (pp_creg r2) (m.pp_k k) @@ -1828,6 +2228,9 @@ let fold_regs (f_regs,f_sregs) = | Ireg _ -> f_regs reg y_reg,y_sreg | Vreg _ -> f_regs reg y_reg,y_sreg | SIMDreg _ -> f_regs reg y_reg,y_sreg + | Zreg _ -> f_regs reg y_reg,y_sreg + | Preg _ -> f_regs reg y_reg,y_sreg + | PMreg _ -> f_regs reg y_reg,y_sreg | Symbolic_reg reg -> y_reg,f_sregs reg y_sreg | Internal _ | NZCV | ZR | SP | PC | ResAddr | Tag _ | SysReg _ @@ -1841,7 +2244,8 @@ let fold_regs (f_regs,f_sregs) = let open MemExt in match idx with | Imm _ -> c - | Reg (_,r,_,_) -> fold_reg r c in + | Reg (_,r,_,_) -> fold_reg r c + | ZReg (r,_,_) -> fold_reg r c in let fold_op_ext e c = let open OpExt in @@ -1859,6 +2263,9 @@ let fold_regs (f_regs,f_sregs) = | I_CHKSLD r | I_CHKTGD r | I_MOVI_V (r,_,_) | I_MOVI_S (_,r,_) | I_TLBI (_,r) + | I_MOV_SV (r,_,_) + | I_INDEX_II (r,_,_) + | I_PTRUE (r,_) -> fold_reg r c | I_MOV (_,r1,kr) -> fold_reg r1 (fold_kr kr c) @@ -1873,8 +2280,8 @@ let fold_regs (f_regs,f_sregs) = | I_MOV_VE (r1,_,r2,_) | I_MOV_V (r1,r2) | I_MOV_TG (_,r1,r2,_) | I_MOV_FG (r1,_,_,r2) | I_MOV_S (_,r1,r2,_) | I_FMOV_TG (_,r1,_,r2) - | I_DUP (r1,_,r2) - | I_ADDV (_,r1,r2) + | I_DUP (r1,_,r2) | I_DUP_SV (r1,_,r2) + | I_ADDV (_,r1,r2) | I_INDEX_SI (r1,_,r2,_) | I_INDEX_IS (r1,_,_,r2) | I_LDUR_SIMD (_,r1,r2,_) | I_LDAPUR_SIMD (_,r1,r2,_) | I_STUR_SIMD (_,r1,r2,_) | I_STLUR_SIMD (_,r1,r2,_) | I_LDG (r1,r2,_) | I_STZG (r1,r2,_) @@ -1899,6 +2306,15 @@ let fold_regs (f_regs,f_sregs) = | I_ST3 (rs,_,r2,kr) | I_ST3M (rs,r2,kr) | I_ST4 (rs,_,r2,kr) | I_ST4M (rs,r2,kr) -> List.fold_right fold_reg rs (fold_reg r2 (fold_kr kr c)) + | I_LD1SP (_,rs,r2,r3,idx) + | I_LD2SP (_,rs,r2,r3,idx) + | I_LD3SP (_,rs,r2,r3,idx) + | I_LD4SP (_,rs,r2,r3,idx) + | I_ST1SP (_,rs,r2,r3,idx) + | I_ST2SP (_,rs,r2,r3,idx) + | I_ST3SP (_,rs,r2,r3,idx) + | I_ST4SP (_,rs,r2,r3,idx) + -> List.fold_right fold_reg rs (fold_reg r2 ( fold_reg r3 (fold_idx idx c))) | I_CSEL (_,r1,r2,r3,_,_) | I_STXR (_,_,r1,r2,r3) | I_STXRBH (_,_,r1,r2,r3) | I_BUILD (r1,r2,r3) | I_CPYTYPE (r1,r2,r3) | I_CPYVALUE (r1,r2,r3) @@ -1908,6 +2324,9 @@ let fold_regs (f_regs,f_sregs) = | I_STP_SIMD (_,_,r1,r2,r3,_) | I_EOR_SIMD (r1,r2,r3) | I_ADD_SIMD (r1,r2,r3) | I_ADD_SIMD_S (r1,r2,r3) | I_LDXP (_,_,r1,r2,r3) + | I_WHILELT (r1,_,r2,r3) | I_WHILELE (r1,_,r2,r3) | I_WHILELO (r1,_,r2,r3) | I_WHILELS (r1,_,r2,r3) + | I_INDEX_SS(r1,_,r2,r3) + | I_UADDV (_,r1,r2,r3) | I_ADD_SV (r1,r2,r3) -> fold_reg r1 (fold_reg r2 (fold_reg r3 c)) | I_LDP (_,_,r1,r2,r3,_) | I_LDPSW (r1,r2,r3,_) @@ -1934,6 +2353,9 @@ let map_regs f_reg f_symb = | Ireg _ -> f_reg reg | Vreg _ -> f_reg reg | SIMDreg _ -> f_reg reg + | Zreg _ -> f_reg reg + | Preg _ -> f_reg reg + | PMreg _ -> f_reg reg | Symbolic_reg reg -> f_symb reg | Internal _ | ZR | SP| PC | NZCV | ResAddr | Tag _ | SysReg _ @@ -1947,7 +2369,8 @@ let map_regs f_reg f_symb = let open MemExt in match idx with | Imm _ -> idx - | Reg (v,r,ext,k) -> Reg (v,map_reg r,ext,k) in + | Reg (v,r,ext,k) -> Reg (v,map_reg r,ext,k) + | ZReg (r,ext,k) -> ZReg (map_reg r,ext,k) in let map_op_ext e = let open OpExt in @@ -2103,7 +2526,50 @@ let map_regs f_reg f_symb = | I_ADD_SIMD (r1,r2,r3) -> I_ADD_SIMD (map_reg r1,map_reg r2,map_reg r3) | I_ADD_SIMD_S (r1,r2,r3) -> - I_ADD_SIMD_S (map_reg r1,map_reg r2,map_reg r3) + I_ADD_SIMD_S (map_reg r1,map_reg r2,map_reg r3) +(* Scalable Vector Extension *) + | I_PTRUE (predicate,pattern) -> + I_PTRUE (map_reg predicate,pattern) + | I_WHILELT (r1,v,r2,r3) -> + I_WHILELT (map_reg r1,v,map_reg r2,map_reg r3) + | I_WHILELE (r1,v,r2,r3) -> + I_WHILELE (map_reg r1,v,map_reg r2,map_reg r3) + | I_WHILELO (r1,v,r2,r3) -> + I_WHILELO (map_reg r1,v,map_reg r2,map_reg r3) + | I_WHILELS (r1,v,r2,r3) -> + I_WHILELS (map_reg r1,v,map_reg r2,map_reg r3) + | I_UADDV (v,r1,r2,r3) -> + I_UADDV (v,map_reg r1,map_reg r2,map_reg r3) + | I_LD1SP (v,rs,r2,r3,idx) -> + I_LD1SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_LD2SP (v,rs,r2,r3,idx) -> + I_LD2SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_LD3SP (v,rs,r2,r3,idx) -> + I_LD3SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_LD4SP (v,rs,r2,r3,idx) -> + I_LD4SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_ST1SP (v,rs,r2,r3,idx) -> + I_ST1SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_ST2SP (v,rs,r2,r3,idx) -> + I_ST2SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_ST3SP (v,rs,r2,r3,idx) -> + I_ST3SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_ST4SP (v,rs,r2,r3,idx) -> + I_ST4SP (v,List.map map_reg rs,map_reg r2,map_reg r3,map_idx idx) + | I_MOV_SV (r,k,os) -> + I_MOV_SV (map_reg r,k,os) + | I_DUP_SV (r1,v,r2) -> + I_DUP_SV (map_reg r1,v,map_reg r2) + | I_ADD_SV (r1,r2,r3) -> + I_ADD_SV (map_reg r1,map_reg r2,map_reg r3) + | I_INDEX_SI (r1,v,r2,k) -> + I_INDEX_SI (map_reg r1,v,map_reg r2,k) + | I_INDEX_IS (r1,v,k,r2) -> + I_INDEX_IS (map_reg r1,v,k,map_reg r2) + | I_INDEX_SS (r1,v,r2,r3) -> + I_INDEX_SS (map_reg r1,v,map_reg r2,map_reg r3) + | I_INDEX_II (r1,k1,k2) -> + I_INDEX_II (map_reg r1,k1,k2) (* Morello *) | I_ALIGNU (r1,r2,k) -> I_ALIGNU(map_reg r1,map_reg r2,k) @@ -2317,6 +2783,12 @@ let get_next = | I_MOVI_V _ | I_MOVI_S _ | I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _ | I_LDXP _|I_STXP _|I_UDF _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_UADDV _ | I_DUP_SV _ | I_PTRUE _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ + | I_LD1SP _ | I_LD2SP _ | I_LD3SP _ | I_LD4SP _ + | I_ST1SP _ | I_ST2SP _ | I_ST3SP _ | I_ST4SP _ + | I_MOV_SV _ | I_ADD_SV _ -> [Label.Next;] (* Check instruction validity, beyond parsing *) @@ -2490,6 +2962,7 @@ module PseudoI = struct match idx with | Imm k -> Imm (idx_tr k) | Reg (v,r,ext,k) -> Reg (v,r,ext,k_tr k) + | ZReg (r,ext,k) -> ZReg (r,ext,k_tr k) let op_ext_tr e = let open OpExt in @@ -2549,6 +3022,9 @@ module PseudoI = struct | I_MOV_S _ | I_LDXP _| I_STXP _ | I_MOPL _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _| I_WHILELS _ + | I_UADDV _ | I_DUP_SV _ | I_PTRUE _ + | I_ADD_SV _ | I_INDEX_SS _ as keep -> keep | I_LDR (v,r1,r2,idx) -> I_LDR (v,r1,r2,ext_tr idx) | I_LDRSW (r1,r2,idx) -> I_LDRSW (r1,r2,ext_tr idx) @@ -2617,6 +3093,18 @@ module PseudoI = struct | I_ADD_SIMD (r1,r2,r3) -> I_ADD_SIMD (r1,r2,r3) | I_ADD_SIMD_S (r1,r2,r3) -> I_ADD_SIMD_S (r1,r2,r3) | I_UDF k -> I_UDF (k_tr k) + | I_MOV_SV (r,k,s) -> I_MOV_SV (r,k_tr k,ap_shift k_tr s) + | I_LD1SP (v,r1,r2,r3,idx) -> I_LD1SP (v,r1,r2,r3,ext_tr idx) + | I_LD2SP (v,r1,r2,r3,idx) -> I_LD2SP (v,r1,r2,r3,ext_tr idx) + | I_LD3SP (v,r1,r2,r3,idx) -> I_LD3SP (v,r1,r2,r3,ext_tr idx) + | I_LD4SP (v,r1,r2,r3,idx) -> I_LD4SP (v,r1,r2,r3,ext_tr idx) + | I_ST1SP (v,r1,r2,r3,idx) -> I_ST1SP (v,r1,r2,r3,ext_tr idx) + | I_ST2SP (v,r1,r2,r3,idx) -> I_ST2SP (v,r1,r2,r3,ext_tr idx) + | I_ST3SP (v,r1,r2,r3,idx) -> I_ST3SP (v,r1,r2,r3,ext_tr idx) + | I_ST4SP (v,r1,r2,r3,idx) -> I_ST4SP (v,r1,r2,r3,ext_tr idx) + | I_INDEX_SI (r1,v,r2,k) -> I_INDEX_SI (r1,v,r2,k_tr k) + | I_INDEX_IS (r1,v,k,r2) -> I_INDEX_IS (r1,v,k_tr k,r2) + | I_INDEX_II (r1,k1,k2) -> I_INDEX_II (r1,k_tr k1,k_tr k2) let get_simd_rpt_selem ins rs = match ins with | I_LD1M _ @@ -2705,6 +3193,11 @@ module PseudoI = struct | I_MOVI_V _ | I_MOVI_S _ | I_EOR_SIMD _ | I_ADD_SIMD _ | I_ADD_SIMD_S _ | I_UDF _ + | I_WHILELT _ | I_WHILELE _ | I_WHILELO _ | I_WHILELS _ + | I_PTRUE _ + | I_ADD_SV _ | I_UADDV _ | I_DUP_SV _ + | I_INDEX_SI _ | I_INDEX_IS _ | I_INDEX_SS _ | I_INDEX_II _ + | I_MOV_SV _ -> 0 | I_LD1M (rs, _, _) | I_LD2M (rs, _, _) @@ -2717,6 +3210,9 @@ module PseudoI = struct -> let (rpt, selem) = (get_simd_rpt_selem ins rs) in let n = get_simd_elements rs in rpt * selem * n + | I_LD1SP (_,rs,_,_,_) | I_LD2SP (_,rs,_,_,_) | I_LD3SP (_,rs,_,_,_) | I_LD4SP (_,rs,_,_,_) + | I_ST1SP (_,rs,_,_,_) | I_ST2SP (_,rs,_,_,_) | I_ST3SP (_,rs,_,_,_) | I_ST4SP (_,rs,_,_,_) + -> 16 * List.length rs (* upper bound *) let size_of_ins _ = 4 diff --git a/lib/AArch64Lexer.mll b/lib/AArch64Lexer.mll index 45945669c3..b64544f447 100644 --- a/lib/AArch64Lexer.mll +++ b/lib/AArch64Lexer.mll @@ -60,20 +60,20 @@ match name with | "hi" | "HI" -> TOK_HI | "ls" | "LS" -> TOK_LS | "al" | "AL" -> TOK_AL -| "b.eq" | "B.EQ" -> BEQ -| "b.ne" | "B.NE" -> BNE -| "b.ge" | "B.GE" -> BGE +| "b.eq" | "B.EQ" | "b.none" | "B.NONE" -> BEQ +| "b.ne" | "B.NE" | "b.any" | "B.ANY" -> BNE +| "b.ge" | "B.GE" | "b.tcont" | "B.TCONT" -> BGE | "b.gt" | "B.GT" -> BGT | "b.le" | "B.LE" -> BLE -| "b.lt" | "B.LT" -> BLT -| "b.cs" | "B.CS" -> BCS -| "b.cc" | "B.CC" -> BCC -| "b.mi" | "B.MI" -> BMI -| "b.pl" | "B.PL" -> BPL +| "b.lt" | "B.LT" | "b.tstop" | "B.TSTOP" -> BLT +| "b.cs" | "B.CS" | "b.nlast" | "B.NLAST" -> BCS +| "b.cc" | "B.CC" | "b.last" | "B.LAST" -> BCC +| "b.mi" | "B.MI" | "b.first" | "B.FIRST" -> BMI +| "b.pl" | "B.PL" | "b.nfirst" | "B.NFIRST" -> BPL | "b.vs" | "B.VS" -> BVS | "b.vc" | "B.VC" -> BVC | "b.hi" | "B.HI" -> BHI -| "b.ls" | "B.LS" -> BLS +| "b.ls" | "B.LS" | "b.plast" | "B.PLAST" -> BLS | "b.al" | "B.AL" -> BAL | "cbz" | "CBZ" -> CBZ | "cbnz" | "CBNZ" -> CBNZ @@ -145,6 +145,65 @@ match name with | "movi" | "MOVI" -> MOVI | "mvn" | "MVN" -> MVN | "fmov" | "FMOV" -> FMOV +(* Scalabel Vector Extension *) +| "whilelt" | "WHILELT" -> WHILELT +| "whilele" | "WHILELE" -> WHILELE +| "whilelo" | "WHILELO" -> WHILELO +| "whilels" | "WHILELS" -> WHILELS +| "uaddv" | "UADDV" -> UADDV +| "ld1b" | "LD1B" -> LD1B +| "ld1h" | "LD1H" -> LD1H +| "ld1w" | "LD1W" -> LD1W +| "ld1d" | "LD1D" -> LD1D +| "ld2b" | "LD2B" -> LD2B +| "ld2h" | "LD2H" -> LD2H +| "ld2w" | "LD2W" -> LD2W +| "ld2d" | "LD2D" -> LD2D +| "ld3b" | "LD3B" -> LD3B +| "ld3h" | "LD3H" -> LD3H +| "ld3w" | "LD3W" -> LD3W +| "ld3d" | "LD3D" -> LD3D +| "ld4b" | "LD4B" -> LD4B +| "ld4h" | "LD4H" -> LD4H +| "ld4w" | "LD4W" -> LD4W +| "ld4d" | "LD4D" -> LD4D +| "st1b" | "ST1B" -> ST1B +| "st1h" | "ST1H" -> ST1H +| "st1w" | "ST1W" -> ST1W +| "st1d" | "ST1D" -> ST1D +| "st2b" | "ST2B" -> ST2B +| "st2h" | "ST2H" -> ST2H +| "st2w" | "ST2W" -> ST2W +| "st2d" | "ST2D" -> ST2D +| "st3b" | "ST3B" -> ST3B +| "st3h" | "ST3H" -> ST3H +| "st3w" | "ST3W" -> ST3W +| "st3d" | "ST3D" -> ST3D +| "st4b" | "ST4B" -> ST4B +| "st4h" | "ST4H" -> ST4H +| "st4w" | "ST4W" -> ST4W +| "st4d" | "ST4D" -> ST4D +| "index" | "INDEX" -> TOK_INDEX +| "mul" | "MUL" -> TOK_MUL +| "vl" | "VL" -> TOK_VL +| "ptrue" | "PTRUE" -> PTRUE +| "pow2" | "POW2" -> TOK_POW2 +| "vl1" | "VL1" -> TOK_VL1 +| "vl2" | "VL2" -> TOK_VL2 +| "vl3" | "VL3" -> TOK_VL3 +| "vl4" | "VL4" -> TOK_VL4 +| "vl5" | "VL5" -> TOK_VL5 +| "vl6" | "VL6" -> TOK_VL6 +| "vl7" | "VL7" -> TOK_VL7 +| "vl8" | "VL8" -> TOK_VL8 +| "vl16" | "VL16" -> TOK_VL16 +| "vl32" | "VL32" -> TOK_VL32 +| "vl64" | "VL64" -> TOK_VL64 +| "vl128" | "VL128" -> TOK_VL128 +| "vl256" | "VL256" -> TOK_VL256 +| "mul4" | "MUL4" -> TOK_MUL4 +| "mul3" | "MUL3" -> TOK_MUL3 +| "all" | "ALL" -> TOK_ALL (* Compare and swap *) | "cas"|"CAS" -> CAS | "casa"|"CASA" -> CASA @@ -577,9 +636,26 @@ match name with | _ -> assert false end | None -> - begin match A.parse_sysreg name with - | Some r -> SYSREG r - | None -> NAME name + begin match A.parse_zreg name with + | Some r -> ARCH_ZREG r + | None -> + begin match A.parse_pmreg name with + | Some r -> + begin match r with + | A.PMreg (_,A.Zero) -> ARCH_PMREG_Z r + | A.PMreg (_,A.Merge) -> ARCH_PMREG_M r + | _ -> assert false + end + | None -> + begin match A.parse_preg name with + | Some r -> ARCH_PREG r + | None -> + begin match A.parse_sysreg name with + | Some r -> SYSREG r + | None -> NAME name + end + end + end end end end diff --git a/lib/AArch64Parser.mly b/lib/AArch64Parser.mly index 6bc1807597..46f07c6a4f 100644 --- a/lib/AArch64Parser.mly +++ b/lib/AArch64Parser.mly @@ -17,6 +17,14 @@ open AArch64Base +let preg_of_proc p = + match p with + | num -> ( + match parse_preg ("P" ^ string_of_int(num)) with + | Some r -> r + | _ -> raise Parsing.Parse_error + ) + (* No constant third argument for those *) let check_op3 op e = match op,e with @@ -38,6 +46,10 @@ let check_op3 op e = %token ARCH_SREG %token ARCH_DREG %token ARCH_QREG +%token ARCH_ZREG +%token ARCH_PREG +%token ARCH_PMREG_Z +%token ARCH_PMREG_M %token INDEX %token NUM %token NAME @@ -95,6 +107,13 @@ let check_op3 op e = %token LDOPBH %token STOPBH %token UDF +/* Scalable Vector Extention */ +%token WHILELT WHILELE WHILELO WHILELS UADDV TOK_INDEX TOK_MUL TOK_VL PTRUE +%token TOK_ALL TOK_POW2 TOK_MUL3 TOK_MUL4 +%token TOK_VL1 TOK_VL2 TOK_VL3 TOK_VL4 TOK_VL5 TOK_VL6 TOK_VL7 TOK_VL8 +%token TOK_VL16 TOK_VL32 TOK_VL64 TOK_VL128 TOK_VL256 +%token LD1B LD1H LD1W LD1D LD2B LD2H LD2W LD2D LD3B LD3H LD3W LD3D LD4B LD4H LD4W LD4D +%token ST1B ST1H ST1W ST1D ST2B ST2H ST2W ST2D ST3B ST3H ST3W ST3D ST4B ST4H ST4W ST4D /* /* %token LDUMAX LDUMAXA LDUMAXL LDUMAXAL LDUMAXH LDUMAXAH LDUMAXLH LDUMAXALH @@ -243,6 +262,58 @@ bhsdregs: | sreg { VSIMD32,$1 } | dreg { VSIMD64,$1 } +preg: +| PROC { preg_of_proc $1 } + +preg_sz: +| ARCH_PREG { $1 } + +pmreg_z: +| ARCH_PMREG_Z { $1 } + +pmreg_m: +| ARCH_PMREG_M { $1 } + +pmreg: +| ARCH_PMREG_Z { $1 } +| ARCH_PMREG_M { $1 } + + +zreg: +| ARCH_ZREG { $1 } + +zregs1: +| LCRL zreg RCRL { [$2] } + +zregs2: +| LCRL zreg COMMA zreg RCRL { [$2;$4] } + +zregs3: +| LCRL zreg COMMA zreg COMMA zreg RCRL { [$2;$4;$6] } + +zregs4: +| LCRL zreg COMMA zreg COMMA zreg COMMA zreg RCRL { [$2;$4;$6;$8] } + +pattern: +| { ALL } +| COMMA TOK_ALL { ALL } +| COMMA TOK_POW2 { POW2 } +| COMMA TOK_VL1 { VL1 } +| COMMA TOK_VL2 { VL2 } +| COMMA TOK_VL3 { VL3 } +| COMMA TOK_VL4 { VL4 } +| COMMA TOK_VL5 { VL5 } +| COMMA TOK_VL6 { VL6 } +| COMMA TOK_VL7 { VL7 } +| COMMA TOK_VL8 { VL8 } +| COMMA TOK_VL16 { VL16 } +| COMMA TOK_VL32 { VL32 } +| COMMA TOK_VL64 { VL64 } +| COMMA TOK_VL128 { VL128 } +| COMMA TOK_VL256 { VL256 } +| COMMA TOK_MUL3 { MUL3 } +| COMMA TOK_MUL4 { MUL4 } + k: | NUM { MetaConst.Int $1 } | META { MetaConst.Meta $1 } @@ -339,34 +410,48 @@ mem_idx: { $2,(MetaConst.zero,Idx) } | LBRK cxreg COMMA k RBRK { $2,($4,Idx) } +| LBRK cxreg COMMA k COMMA TOK_MUL TOK_VL RBRK + { $2,($4,Idx) } | LBRK cxreg RBRK COMMA k { $2,($5,PostIdx) } | LBRK cxreg COMMA k RBRK BANG { $2,($4,PreIdx) } +mreg: +| xreg { V64,$1 } +| wreg { V32,$1 } +(* (ab)use V128 to indicate Z regester *) +| zreg { V128,$1 } + mem_ea: | mem_idx { let r,idx = $1 in r,MemExt.Imm idx } -| LBRK cxreg COMMA wxreg RBRK +| LBRK cxreg COMMA mreg RBRK { let open MemExt in let v,r = $4 in - $2,Reg (v,r,LSL,MetaConst.zero) + match v with + | V32|V64 -> $2,Reg (v,r,LSL,MetaConst.zero) + | V128 -> $2,ZReg (r,LSL,MetaConst.zero) } -| LBRK cxreg COMMA wxreg COMMA mem_sext RBRK +| LBRK cxreg COMMA mreg COMMA mem_sext RBRK { let open MemExt in let v,r = $4 in - $2,Reg (v,r,$6,MetaConst.zero) + match v with + | V32|V64 -> $2,Reg (v,r,$6,MetaConst.zero) + | V128 -> $2,ZReg (r,$6,MetaConst.zero) } -| LBRK cxreg COMMA wxreg COMMA mem_sext k RBRK +| LBRK cxreg COMMA mreg COMMA mem_sext k RBRK { let open MemExt in let v,r = $4 in - $2,Reg (v,r,$6,$7) + match v with + | V32|V64 -> $2,Reg (v,r,$6,$7) + | V128 -> $2,ZReg (r,$6,$7) } shift: @@ -723,6 +808,397 @@ instr: { I_ADD_SIMD ($2,$4,$6) } | TOK_ADD dreg COMMA dreg COMMA dreg { I_ADD_SIMD_S ($2,$4,$6)} + /* Scalabel Vector extension */ +| WHILELT preg_sz COMMA xreg COMMA xreg + { I_WHILELT ($2,V64,$4,$6)} +| WHILELT preg_sz COMMA wreg COMMA wreg + { I_WHILELT ($2,V32,$4,$6)} +| WHILELE preg_sz COMMA xreg COMMA xreg + { I_WHILELE ($2,V64,$4,$6)} +| WHILELE preg_sz COMMA wreg COMMA wreg + { I_WHILELE ($2,V32,$4,$6)} +| WHILELO preg_sz COMMA xreg COMMA xreg + { I_WHILELO ($2,V64,$4,$6)} +| WHILELO preg_sz COMMA wreg COMMA wreg + { I_WHILELO ($2,V32,$4,$6)} +| WHILELS preg_sz COMMA xreg COMMA xreg + { I_WHILELS ($2,V64,$4,$6)} +| WHILELS preg_sz COMMA wreg COMMA wreg + { I_WHILELS ($2,V32,$4,$6)} +| UADDV dreg COMMA preg COMMA zreg + { I_UADDV (VSIMD64,$2,$4,$6)} +| LD1B zregs1 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + -> I_LD1SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| LD1H zregs1 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 1) + | ZReg (_,UXTW,MetaConst.Int 1) + | ZReg (_,SXTW,MetaConst.Int 1) + -> I_LD1SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| LD1W zregs1 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 2) + | ZReg (_,UXTW,MetaConst.Int 2) + | ZReg (_,SXTW,MetaConst.Int 2) + -> I_LD1SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| LD1D zregs1 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 3) + | ZReg (_,UXTW,MetaConst.Int 3) + | ZReg (_,SXTW,MetaConst.Int 3) + -> I_LD1SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| LD2B zregs2 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_LD2SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| LD2H zregs2 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_LD2SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| LD2W zregs2 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) -> I_LD2SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| LD2D zregs2 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_LD2SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| LD3B zregs3 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_LD3SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| LD3H zregs3 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_LD3SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| LD3W zregs3 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_LD3SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| LD3D zregs3 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_LD3SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| LD4B zregs4 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_LD4SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| LD4H zregs4 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_LD4SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| LD4W zregs4 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_LD4SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| LD4D zregs4 COMMA pmreg_z COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_LD4SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| ST1B zregs1 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + -> I_ST1SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| ST1H zregs1 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 1) + | ZReg (_,UXTW,MetaConst.Int 1) + | ZReg (_,SXTW,MetaConst.Int 1) + -> I_ST1SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| ST1W zregs1 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 2) + | ZReg (_,UXTW,MetaConst.Int 2) + | ZReg (_,SXTW,MetaConst.Int 2) + -> I_ST1SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| ST1D zregs1 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + | ZReg (_,LSL,MetaConst.Int 0) + | ZReg (_,UXTW,MetaConst.Int 0) + | ZReg (_,SXTW,MetaConst.Int 0) + | ZReg (_,LSL,MetaConst.Int 3) + | ZReg (_,UXTW,MetaConst.Int 3) + | ZReg (_,SXTW,MetaConst.Int 3) + -> I_ST1SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| ST2B zregs2 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_ST2SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| ST2H zregs2 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_ST2SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| ST2W zregs2 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_ST2SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| ST2D zregs2 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_ST2SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| ST3B zregs3 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_ST3SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| ST3H zregs3 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_ST3SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| ST3W zregs3 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_ST3SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| ST3D zregs3 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_ST3SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| ST4B zregs4 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 0) + -> I_ST4SP (VSIMD8,$2,$4,ra,ext) + | _ -> assert false + } +| ST4H zregs4 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 1) + -> I_ST4SP (VSIMD16,$2,$4,ra,ext) + | _ -> assert false + } +| ST4W zregs4 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 2) + -> I_ST4SP (VSIMD32,$2,$4,ra,ext) + | _ -> assert false + } +| ST4D zregs4 COMMA preg COMMA mem_ea + { let open MemExt in + let (ra,ext) = $6 in + match ext with + | Imm (_,Idx) + | Reg (V64,_,LSL,MetaConst.Int 3) + -> I_ST4SP (VSIMD64,$2,$4,ra,ext) + | _ -> assert false + } +| MOV zreg COMMA k + { I_MOV_SV ($2, $4, S_NOEXT) } +| MOV zreg COMMA k COMMA shift + { I_MOV_SV ($2, $4, $6) } +| DUP zreg COMMA wxreg + { let v,r = $4 in + I_DUP_SV ($2 , v, r) } +| TOK_ADD zreg COMMA zreg COMMA zreg + { I_ADD_SV ($2,$4,$6) } +| TOK_INDEX zreg COMMA xreg COMMA k + { match $2 with + | Zreg(_,64) -> I_INDEX_SI ($2,V64,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA wreg COMMA k + { match $2 with + | Zreg(_,8) + | Zreg(_,16) + | Zreg(_,32) -> I_INDEX_SI ($2,V32,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA k COMMA xreg + { match $6 with + | Zreg(_,64) -> I_INDEX_IS ($2,V64,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA k COMMA wreg + { match $6 with + | Zreg(_,8) + | Zreg(_,16) + | Zreg(_,32) -> I_INDEX_IS ($2,V32,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA xreg COMMA xreg + { match $2 with + | Zreg(_,64) -> I_INDEX_SS ($2,V64,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA wreg COMMA wreg + { match $2 with + | Zreg(_,8) + | Zreg(_,16) + | Zreg(_,32) -> I_INDEX_SS ($2,V32,$4,$6) + | _ -> assert false } +| TOK_INDEX zreg COMMA k COMMA k + { I_INDEX_II ($2,$4,$6) } +| PTRUE preg_sz pattern + { I_PTRUE ($2,$3) } /* Compare and swap */ | CAS wreg COMMA wreg COMMA LBRK cxreg zeroopt RBRK { I_CAS (V32,RMW_P,$2,$4,$7) } diff --git a/lib/CType.ml b/lib/CType.ml index 6f57fe8fb9..d9e9f5a51e 100644 --- a/lib/CType.ml +++ b/lib/CType.ml @@ -32,6 +32,8 @@ let voidstar = Pointer void let word = Base "int" let quad = Base "int64_t" let int32x4_t = Base "int32x4_t" +let svbool_t = Base "svbool_t" +let svint32_t = Base "svint32_t" let pteval_t = Base "pteval_t" let pte = Pointer pteval_t let ins_t = Base "ins_t" diff --git a/lib/CType.mli b/lib/CType.mli index c34200ce12..79dd35d752 100644 --- a/lib/CType.mli +++ b/lib/CType.mli @@ -32,6 +32,8 @@ val voidstar : t val word : t val quad : t val int32x4_t : t +val svbool_t : t +val svint32_t : t val pteval_t : t val pte : t val ins_t : t diff --git a/litmus/AArch64Arch_litmus.ml b/litmus/AArch64Arch_litmus.ml index af79fd059d..4a7417ccb1 100644 --- a/litmus/AArch64Arch_litmus.ml +++ b/litmus/AArch64Arch_litmus.ml @@ -36,6 +36,12 @@ module Make(O:Arch_litmus.Config)(V:Constant.S) = struct | Internal i -> sprintf "i%i" i | Vreg (vr, _) | SIMDreg vr -> (try Misc.lowercase (List.assoc vr vvrs) with Not_found -> assert false) + | Preg (pr, _) -> + (try Misc.lowercase (List.assoc pr pvrs) with Not_found -> assert false) + | PMreg (pr, _) -> + (try Misc.lowercase (List.assoc pr pvrs) with Not_found -> assert false) + | Zreg (zr, _) -> + (try Misc.lowercase (List.assoc zr vvrs) with Not_found -> assert false) (*Intentionally 'vvrs' instead of 'zvrs'*) | _ -> try Misc.lowercase (Hashtbl.find tab r) with Not_found -> assert false @@ -62,18 +68,23 @@ module Make(O:Arch_litmus.Config)(V:Constant.S) = struct in match r with | (Vreg _ | SIMDreg _ ) -> Some (sprintf "vdupq_n_s32(%s)" v, "int32x4_t") + | Zreg _ -> Some (sprintf "svdup_s32(%s)" v, "svint32_t") + | Preg _ | PMreg _ -> Some (sprintf "svdup_b32(%s)" v, "svbool_t") | _ -> None let reg_class reg = match reg with - | Vreg _ | SIMDreg _ -> "=&w" + | Vreg _ | SIMDreg _ | Zreg _ -> "=&w" + | Preg _ | PMreg _ -> "=&Upa" | _ -> "=&r" let reg_class_stable reg = match reg with (* Certain Neon instructions do not affect the whole register, so we need to guarantee that unaffected parts are initialized to zero which basically means that we need to initialize whole register to zero. Several options have been evaluated and it seems the only robust way to achieve that is using the - constraint "+" and the explicit initialization of the 'stable_*' variables *) - | Vreg _ | SIMDreg _ -> "+w" + constraint "+" and the explicit initialization of the 'stable_*' variables. + The same applies to SVE instruction with P/M (merging predicate) *) + | Vreg _ | SIMDreg _ | Zreg _ -> "+w" + | Preg _ | PMreg _ -> "=Upa" | _ -> "=r" let comment = comment diff --git a/litmus/AArch64Compile_litmus.ml b/litmus/AArch64Compile_litmus.ml index fdec62fc65..4fa135bf6d 100644 --- a/litmus/AArch64Compile_litmus.ml +++ b/litmus/AArch64Compile_litmus.ml @@ -49,6 +49,14 @@ module Make(V:Constant.S)(C:Config) = let stable_regs _ins = match _ins with | I_LDAP1 (rs,_,_,_) + | I_LD1SP (_,rs,_,_,_) + | I_LD2SP (_,rs,_,_,_) + | I_LD3SP (_,rs,_,_,_) + | I_LD4SP (_,rs,_,_,_) + | I_ST1SP (_,rs,_,_,_) + | I_ST2SP (_,rs,_,_,_) + | I_ST3SP (_,rs,_,_,_) + | I_ST4SP (_,rs,_,_,_) | I_LD1 (rs,_,_,_) | I_LD1M (rs,_,_) | I_LD2 (rs,_,_,_) | I_LD2M (rs,_,_) | I_LD2R (rs,_,_) | I_LD3 (rs,_,_,_) | I_LD3M (rs,_,_) | I_LD3R (rs,_,_) @@ -60,6 +68,12 @@ module Make(V:Constant.S)(C:Config) = A.RegSet.of_list rs | I_MOV_FG (r,_,_,_) -> A.RegSet.of_list [r] | I_MOV_VE (r,_,_,_) -> A.RegSet.of_list [r] + (* To allow view of Scalable register via Neon one *) + | I_ADD_SV (r,_,_) + | I_DUP_SV (r,_,_) + | I_MOV_SV (r,_,_) + | I_INDEX_SI (r,_,_,_) | I_INDEX_IS (r,_,_,_) | I_INDEX_SS (r,_,_,_) | I_INDEX_II (r,_,_) + -> A.RegSet.of_list [r] | _ -> A.RegSet.empty (* Generic funs for zr *) @@ -135,6 +149,8 @@ module Make(V:Constant.S)(C:Config) = let add_p = add_type voidstar (* pointer *) let add_128 = add_type int32x4_t let add_v v = v2type v |> add_type + let add_svbool_t = add_type svbool_t + let add_svint32_t = add_type svint32_t (* pretty prints barrel shifters *) let pp_shifter = function @@ -305,6 +321,7 @@ module Make(V:Constant.S)(C:Config) = inputs=[rA]@i; outputs=[rD]; reg_env=add_v v i@[(rA,voidstar); (rD,td);]; } + | _ -> assert false let load memo v = match v with @@ -514,6 +531,7 @@ module Make(V:Constant.S)(C:Config) = memo fA fB fC (pp_mem_sext se) k; inputs=rA@[rB]@rC; reg_env=[(rB,voidstar)]@add_v v rA@add_v vC rC; } + | _ -> assert false let stxr memo v r1 r2 r3 = match v with | V32 -> @@ -918,6 +936,322 @@ module Make(V:Constant.S)(C:Config) = outputs = [r1]; reg_env = (add_128 [r1;r2;r3;])} +(* Scalable vector Extension *) + let pp_pattern = function + | POW2 -> "pow2" + | VL1 -> "vl1" + | VL2 -> "vl2" + | VL3 -> "vl3" + | VL4 -> "vl4" + | VL5 -> "vl5" + | VL6 -> "vl6" + | VL7 -> "vl7" + | VL8 -> "vl8" + | VL16 -> "vl16" + | VL32 -> "vl32" + | VL64 -> "vl64" + | VL128 -> "vl128" + | VL256 -> "vl256" + | MUL4 -> "mul4" + | MUL3 -> "mul3" + | ALL -> "all" + + let print_preg io offset i r = match r with + | Preg (_,s) -> "^" ^ io ^ string_of_int (i+offset) ^ + (try Misc.lowercase (List.assoc s sve_arrange_specifier) with Not_found -> assert false) + | PMreg (_,s) -> "^" ^ io ^ string_of_int (i+offset) ^ + (try Misc.lowercase (List.assoc s sve_pred_modifier) with Not_found -> assert false) + | _ -> assert false + + let print_zreg io offset i r = match r with + | Zreg (_,s) -> "^" ^ io ^ string_of_int (i+offset) ^ + (try Misc.lowercase (List.assoc s sve_arrange_specifier) with Not_found -> assert false) + | _ -> assert false + + let while_op memo r1 v r2 r3 = + { empty_ins with + memo = sprintf "%s %s,%s" + memo + (print_preg "o" 0 0 r1) + (match v with | V32 -> "^wi0, ^wi1" | V64 -> "^i0, ^i1" | V128 -> assert false); + inputs = [r2;r3;]; + outputs = [r1]; + reg_env = (add_svbool_t [r1;])@(add_w [r2;r3])} + + let uaddv v r1 r2 r3 = + { empty_ins with + memo = sprintf "uaddv %s,%s,%s" + (print_vecreg v "o" 0) + (print_preg "i" 0 0 r2) + (print_zreg "i" 0 1 r3); + inputs = [r2;r3;]; + outputs = [r1]; + reg_env = (add_128 [r1;])@(add_svbool_t [r2;])@(add_svint32_t [r3;])} + + let ldnsp memo v rs pg rA idx = + let open MemExt in + match idx with + | Imm (0,Idx) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "o" 0) rs)) + (print_preg "i" 0 0 pg); + inputs = [pg;rA]; + outputs = rs; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]} + | Imm (k,Idx) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,#%i,mul vl]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "o" 0) rs)) + (print_preg "i" 0 0 pg) + k; + inputs = [pg;rA]; + outputs = rs; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]} + | Reg (V64,rM,LSL,0) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,^i2]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "o" 0) rs)) + (print_preg "i" 0 0 pg); + inputs = [pg;rA;rM]; + outputs = rs; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]@(add_q [rM;])} + | Reg (V64,rM,LSL,k) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,^i2,LSL #%d]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "o" 0) rs)) + (print_preg "i" 0 0 pg) + k; + inputs = [pg;rA;rM]; + outputs = rs; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]@(add_q [rM;])} + | ZReg(rM,LSL,0) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,%s]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "o" 0 0 r) + (print_preg "i" 0 0 pg) + (print_zreg "i" 0 2 rM); + inputs = [pg;rA;rM]; + outputs = [r]; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | ZReg(rM,se,0) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,%s,%s]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "o" 0 0 r) + (print_preg "i" 0 0 pg) + (print_zreg "i" 0 2 rM) + (pp_mem_sext se); + inputs = [pg;rA;rM]; + outputs = [r]; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | ZReg(rM,se,k) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,%s,%s #%d]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "o" 0 0 r) + (print_preg "i" 0 0 pg) + (print_zreg "i" 0 2 rM) + (pp_mem_sext se) + k; + inputs = [pg;rA;rM]; + outputs = [r]; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | _ -> assert false + + let stnsp memo v rs pg rA idx = + let open MemExt in + match idx with + | Imm (0,Idx) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "i" 2) rs)) + (print_preg "i" 0 0 pg); + inputs = [pg;rA]@rs; + outputs = []; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]} + | Imm (k,Idx) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,#%i,mul vl]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "i" 2) rs)) + (print_preg "i" 0 0 pg) + k; + inputs = [pg;rA]@rs; + outputs = []; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]} + | Reg (V64,rM,LSL,0) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,^i2]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "i" 3) rs)) + (print_preg "i" 0 0 pg); + inputs = [pg;rA;rM]@rs; + outputs = []; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]@(add_q [rM;])} + | Reg (V64,rM,LSL,k) -> + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i1,^i2,LSL #%d]" + memo + (Misc.lowercase (pp_simd_variant v)) + (String.concat "," (List.mapi (print_zreg "i" 3) rs)) + (print_preg "i" 0 0 pg) + k; + inputs = [pg;rA;rM]@rs; + outputs = []; + reg_env = (add_svint32_t rs)@(add_svbool_t [pg;])@[(rA,voidstar)]@(add_q [rM;])} + | ZReg(rM,LSL,0) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i2,%s]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "i" 0 0 r) + (print_preg "i" 0 1 pg) + (print_zreg "i" 0 3 rM); + inputs = [r;pg;rA;rM]; + outputs = []; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | ZReg(rM,se,0) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i2,%s,%s]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "i" 0 0 r) + (print_preg "i" 0 1 pg) + (print_zreg "i" 0 3 rM) + (pp_mem_sext se); + inputs = [r;pg;rA;rM]; + outputs = []; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | ZReg(rM,se,k) -> + let r = List.hd rs in + { empty_ins with + memo = sprintf "%s%s {%s},%s,[^i2,%s,%s #%d]" + memo + (Misc.lowercase (pp_simd_variant v)) + (print_zreg "i" 0 0 r) + (print_preg "i" 0 1 pg) + (print_zreg "i" 0 3 rM) + (pp_mem_sext se) + k; + inputs = [r;pg;rA;rM]; + outputs = []; + reg_env = (add_svint32_t [r;rM])@(add_svbool_t [pg;])@[(rA,voidstar)]} + | _ -> assert false + + let mov_sv r k s = match s with + | S_NOEXT -> + { empty_ins with + memo = sprintf "mov %s,#%i" (print_zreg "o" 0 0 r) k; + inputs = []; + outputs = [r;]; + reg_env = (add_svint32_t [r])} + | S_LSL(ks) -> + { empty_ins with + memo = sprintf "mov %s,#%i,%s" (print_zreg "o" 0 0 r) k (pp_shifter (S_LSL ks)); + inputs = []; + outputs = [r;]; + reg_env = (add_svint32_t [r])} + | _ -> assert false + + let dup_sv_v r1 v r2 = + { empty_ins with + memo = sprintf "dup %s,%s" + (print_zreg "o" 0 0 r1) + (match v with | V32 -> "^wi0" | V64 -> "^i0" | V128 -> assert false); + inputs = [r2]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;]) @ (( + match v with + | V32 -> add_w + | V64 -> add_q + | V128 -> assert false) [r2;])} + + let add_sv r1 r2 r3 = + { empty_ins with + memo = sprintf "add %s,%s,%s" (print_zreg "o" 0 0 r1) (print_zreg "i" 0 0 r2) (print_zreg "i" 0 1 r3); + inputs = [r2;r3]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;r2;r3;])} + + let index_si r1 v r2 k = + { empty_ins with + memo = sprintf "index %s,%s,#%i" + (print_zreg "o" 0 0 r1) + (match v with | V32 -> "^wi0" | V64 -> "^i0" | V128 -> assert false) + k; + inputs = [r2]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;]) @ (( + match v with + | V32 -> add_w + | V64 -> add_q + | V128 -> assert false) [r2;])} + + let index_is r1 v k r2 = + { empty_ins with + memo = sprintf "index %s,#%i,%s" + (print_zreg "o" 0 0 r1) + k + (match v with | V32 -> "^wi0" | V64 -> "^i0" | V128 -> assert false); + inputs = [r2]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;]) @ (( + match v with + | V32 -> add_w + | V64 -> add_q + | V128 -> assert false) [r2;])} + + let index_ss r1 v r2 r3 = + { empty_ins with + memo = sprintf "index %s,%s" + (print_zreg "o" 0 0 r1) + (match v with | V32 -> "^wi0,^wi1" | V64 -> "^i0,^i1" | V128 -> assert false); + inputs = [r2;r3]; + outputs = [r1]; + reg_env = (add_svint32_t [r1;]) @ (( + match v with + | V32 -> add_w + | V64 -> add_q + | V128 -> assert false) [r2;r3])} + + + let index_ii r1 k1 k2 = + { empty_ins with + memo = sprintf "index %s,#%i,#%i" + (print_zreg "o" 0 0 r1) k1 k2; + inputs = []; + outputs = [r1]; + reg_env = (add_svint32_t [r1;])} + + let ptrue pred pattern = + { empty_ins with + memo = sprintf "ptrue %s,%s" (print_preg "o" 0 0 pred) (pp_pattern pattern); + inputs = []; + outputs = [pred]; + reg_env = (add_svbool_t [pred;])} + (* Compare and swap *) let cas_memo rmw = Misc.lowercase (cas_memo rmw) @@ -1332,6 +1666,28 @@ module Make(V:Constant.S)(C:Config) = | I_EOR_SIMD (r1,r2,r3) -> eor_simd r1 r2 r3::k | I_ADD_SIMD (r1,r2,r3) -> add_simd r1 r2 r3::k | I_ADD_SIMD_S (r1,r2,r3) -> add_simd_s r1 r2 r3::k +(* Scalable Vector Extension Load and Store *) + | I_WHILELT (z,v,r1,r2) -> while_op "whilelt" z v r1 r2::k + | I_WHILELE (z,v,r1,r2) -> while_op "whilele" z v r1 r2::k + | I_WHILELO (z,v,r1,r2) -> while_op "whilelo" z v r1 r2::k + | I_WHILELS (z,v,r1,r2) -> while_op "whilels" z v r1 r2::k + | I_UADDV (v,r1,r2,r3) -> uaddv v r1 r2 r3::k + | I_LD1SP (v,rs,r2,r3,idx) -> ldnsp "ld1" v rs r2 r3 idx::k + | I_LD2SP (v,rs,r2,r3,idx) -> ldnsp "ld2" v rs r2 r3 idx::k + | I_LD3SP (v,rs,r2,r3,idx) -> ldnsp "ld3" v rs r2 r3 idx::k + | I_LD4SP (v,rs,r2,r3,idx) -> ldnsp "ld4" v rs r2 r3 idx::k + | I_ST1SP (v,rs,r2,r3,idx) -> stnsp "st1" v rs r2 r3 idx::k + | I_ST2SP (v,rs,r2,r3,idx) -> stnsp "st2" v rs r2 r3 idx::k + | I_ST3SP (v,rs,r2,r3,idx) -> stnsp "st3" v rs r2 r3 idx::k + | I_ST4SP (v,rs,r2,r3,idx) -> stnsp "st4" v rs r2 r3 idx::k + | I_MOV_SV (r,kr,s) -> mov_sv r kr s::k + | I_DUP_SV (r1,v,r2) -> dup_sv_v r1 v r2::k + | I_ADD_SV (r1,r2,r3) -> add_sv r1 r2 r3::k + | I_INDEX_SI (r1,v,r2,kr) -> index_si r1 v r2 kr::k + | I_INDEX_IS (r1,v,kr,r2) -> index_is r1 v kr r2::k + | I_INDEX_SS (r1,v,r2,r3) -> index_ss r1 v r2 r3::k + | I_INDEX_II (r1,k1,k2) -> index_ii r1 k1 k2::k + | I_PTRUE (pred,pat) -> ptrue pred pat::k (* Arithmetic *) | I_MOV (v,r,K i) -> mov_const v r i::k | I_MOV (v,r1,RV (_,r2)) -> movr v r1 r2::k diff --git a/litmus/ASMLang.ml b/litmus/ASMLang.ml index bb57b5fc43..d983d57e67 100644 --- a/litmus/ASMLang.ml +++ b/litmus/ASMLang.ml @@ -339,7 +339,7 @@ module RegMap = A.RegMap) with Not_found -> Compile.base in match ty with | CType.Array _ when A.arch = `AArch64 -> - fprintf chan "%s%s out_%s = %s;\n" indent + fprintf chan "%s%s out_%s = cast(%s);\n" indent (CType.dump CType.int32x4_t) (dump_stable_reg reg) (dump_stable_reg reg); fprintf chan "%smemcpy(%s, &out_%s, sizeof(%s));\n" indent (compile_out_reg proc reg) (dump_stable_reg reg) (CType.dump ty) diff --git a/litmus/libdir/_aarch64/intrinsics.h b/litmus/libdir/_aarch64/intrinsics.h index f603817d56..1859576b23 100644 --- a/litmus/libdir/_aarch64/intrinsics.h +++ b/litmus/libdir/_aarch64/intrinsics.h @@ -1,3 +1,13 @@ #ifdef __ARM_NEON #include +#define cast(v) \ +({ \ + typeof(v) __in = (v); \ + int32x4_t __out; \ + asm("" : "=w"(__out) : "0"(__in)); \ + __out; \ +}) #endif /* __ARM_NEON */ +#ifdef __ARM_FEATURE_SVE +#include +#endif /* __ARM_FEATURE_SVE */ diff --git a/litmus/libdir/armv8+sve-qemu.cfg b/litmus/libdir/armv8+sve-qemu.cfg new file mode 100644 index 0000000000..22eb4cc203 --- /dev/null +++ b/litmus/libdir/armv8+sve-qemu.cfg @@ -0,0 +1,13 @@ +size_of_test = 10k +number_of_run = 100 +avail = 4 +limit = true +memory = direct +stride = 1 +carch = AArch64 +barrier = userfence +smt = 4 +smt_mode = seq +affinity = incr0 +ccopts = -O2 -march=armv8-a+sve +crossrun = qemu:qemu-aarch64 diff --git a/litmus/template.ml b/litmus/template.ml index 75e9a2b10d..3658c2745d 100644 --- a/litmus/template.ml +++ b/litmus/template.ml @@ -349,7 +349,7 @@ module Make(O:Config)(A:I) = let digit i = let c = Char.code t.memo.[i] in let n = c - Char.code '0' in - if 0 <= n && n <= 5 then n + if 0 <= n && n <= 6 then n else internal (sprintf "bad digit '%i' (%c)" n t.memo.[i]) and substring i j =