Skip to content

Latest commit

 

History

History
911 lines (881 loc) · 72.8 KB

intrinsics.adoc

File metadata and controls

911 lines (881 loc) · 72.8 KB

Intrinsic support for XTheadVector

Introduction

The RISC-V XTheadVector C intrinsics provide users interfaces in the C language level to directly leverage XTheadVector ([xtheadvector]), with assistance from the compiler in handling instruction scheduling and register allocation.

The XTheadVector intrinsics support all RVV intrinsic functions with the restriction of missing support for fractional LMUL values (https://github.com/riscv-non-isa/rvv-intrinsic-doc). Additionally, the XTheadVector extension intrinsics API extends the RVV intrinsics API by new load/store functions and integer extract functions (Additional Intrinsic Functions for XTheadVector).

Test macro

The __riscv_th_v_intrinsic macro is the C macro to test the compiler’s support for the RISC-V XTheadVector extension intrinsics. The API extension is optional and targeting the vendor extension XTheadVector.

The value of the test macro is defined as its version, which is computed using the following formula. The formula is identical to what is defined in the RISC-V C API specification (https://github.com/riscv-non-isa/rvv-intrinsic-doc) .

<MAJOR_VERSION> * 1,000,000 + <MINOR_VERSION> * 1,000 + <REVISION_VERSION>

For example, the v1.0 version should define the macro with value 1000000.

Availability

With <riscv_th_vector.h> included, availability of intrinsic variants depends on the required architecture of their corresponding xtheadvector instructions. The supported architecture is specified to the compiler using the -march option.

We suggest guarding the inclusion with the test macro.

#if defined(__riscv_xtheadvector) && defined(__riscv_th_v_intrinsic)
#include <riscv_th_vector.h>
#elif defined (__riscv_vector) && defined (__riscv_v_intrinsic)
#include <riscv_vector.h>
#else
#error This file requires either RVV intrinsics or XTheadVector intrinsics
#endif

Example

The following shows an example with vlb intrinsic of vint32m1_t output vector type, vsb intrinsic of vint32m1_t input vector type as well as vadd intrinsic of vint32m1_t input/output vector type.

void function (void *in, void *out)
{
    vint32m1_t v = __riscv_th_vlb_v_i32m1 (in, 4);
    vint32m1_t v2 = __riscv_th_vlb_v_i32m1_tu (v, in, 4);
    vint32m1_t v3 = __riscv_vadd_vv_i32m1 (v2, v2, 4);
    vint32m1_t v4 = __riscv_vadd_vv_i32m1_tu (v3, v2, v2, 4);
    __riscv_th_vsb_v_i32m1 (out, v4, 4);
}

Additional Intrinsic Functions for XTheadVector

XtheadVector Unit-Stride Load Intrinsics
vint8m1_t __riscv_th_vlb_v_i8m1 (const int8_t *a, size_t vl);
vint8m2_t __riscv_th_vlb_v_i8m2 (const int8_t *a, size_t vl);
vint8m4_t __riscv_th_vlb_v_i8m4 (const int8_t *a, size_t vl);
vint8m8_t __riscv_th_vlb_v_i8m8 (const int8_t *a, size_t vl);
vint16m1_t __riscv_th_vlb_v_i16m1 (const int16_t *a, size_t vl);
vint16m2_t __riscv_th_vlb_v_i16m2 (const int16_t *a, size_t vl);
vint16m4_t __riscv_th_vlb_v_i16m4 (const int16_t *a, size_t vl);
vint16m8_t __riscv_th_vlb_v_i16m8 (const int16_t *a, size_t vl);
vint32m1_t __riscv_th_vlb_v_i32m1 (const int32_t *a, size_t vl);
vint32m2_t __riscv_th_vlb_v_i32m2 (const int32_t *a, size_t vl);
vint32m4_t __riscv_th_vlb_v_i32m4 (const int32_t *a, size_t vl);
vint32m8_t __riscv_th_vlb_v_i32m8 (const int32_t *a, size_t vl);
vint64m1_t __riscv_th_vlb_v_i64m1 (const int64_t *a, size_t vl);
vint64m2_t __riscv_th_vlb_v_i64m2 (const int64_t *a, size_t vl);
vint64m4_t __riscv_th_vlb_v_i64m4 (const int64_t *a, size_t vl);
vint64m8_t __riscv_th_vlb_v_i64m8 (const int64_t *a, size_t vl);
vint16m1_t __riscv_th_vlh_v_i16m1 (const int16_t *a, size_t vl);
vint16m2_t __riscv_th_vlh_v_i16m2 (const int16_t *a, size_t vl);
vint16m4_t __riscv_th_vlh_v_i16m4 (const int16_t *a, size_t vl);
vint16m8_t __riscv_th_vlh_v_i16m8 (const int16_t *a, size_t vl);
vint32m1_t __riscv_th_vlh_v_i32m1 (const int32_t *a, size_t vl);
vint32m2_t __riscv_th_vlh_v_i32m2 (const int32_t *a, size_t vl);
vint32m4_t __riscv_th_vlh_v_i32m4 (const int32_t *a, size_t vl);
vint32m8_t __riscv_th_vlh_v_i32m8 (const int32_t *a, size_t vl);
vint64m1_t __riscv_th_vlh_v_i64m1 (const int64_t *a, size_t vl);
vint64m2_t __riscv_th_vlh_v_i64m2 (const int64_t *a, size_t vl);
vint64m4_t __riscv_th_vlh_v_i64m4 (const int64_t *a, size_t vl);
vint64m8_t __riscv_th_vlh_v_i64m8 (const int64_t *a, size_t vl);
vint32m1_t __riscv_th_vlw_v_i32m1 (const int32_t *a, size_t vl);
vint32m2_t __riscv_th_vlw_v_i32m2 (const int32_t *a, size_t vl);
vint32m4_t __riscv_th_vlw_v_i32m4 (const int32_t *a, size_t vl);
vint32m8_t __riscv_th_vlw_v_i32m8 (const int32_t *a, size_t vl);
vint64m1_t __riscv_th_vlw_v_i64m1 (const int64_t *a, size_t vl);
vint64m2_t __riscv_th_vlw_v_i64m2 (const int64_t *a, size_t vl);
vint64m4_t __riscv_th_vlw_v_i64m4 (const int64_t *a, size_t vl);
vint64m8_t __riscv_th_vlw_v_i64m8 (const int64_t *a, size_t vl);
vuint8m1_t __riscv_th_vlbu_v_u8m1 (const uint8_t *a, size_t vl);
vuint8m2_t __riscv_th_vlbu_v_u8m2 (const uint8_t *a, size_t vl);
vuint8m4_t __riscv_th_vlbu_v_u8m4 (const uint8_t *a, size_t vl);
vuint8m8_t __riscv_th_vlbu_v_u8m8 (const uint8_t *a, size_t vl);
vuint16m1_t __riscv_th_vlbu_v_u16m1 (const uint16_t *a, size_t vl);
vuint16m2_t __riscv_th_vlbu_v_u16m2 (const uint16_t *a, size_t vl);
vuint16m4_t __riscv_th_vlbu_v_u16m4 (const uint16_t *a, size_t vl);
vuint16m8_t __riscv_th_vlbu_v_u16m8 (const uint16_t *a, size_t vl);
vuint32m1_t __riscv_th_vlbu_v_u32m1 (const uint32_t *a, size_t vl);
vuint32m2_t __riscv_th_vlbu_v_u32m2 (const uint32_t *a, size_t vl);
vuint32m4_t __riscv_th_vlbu_v_u32m4 (const uint32_t *a, size_t vl);
vuint32m8_t __riscv_th_vlbu_v_u32m8 (const uint32_t *a, size_t vl);
vuint64m1_t __riscv_th_vlbu_v_u64m1 (const uint64_t *a, size_t vl);
vuint64m2_t __riscv_th_vlbu_v_u64m2 (const uint64_t *a, size_t vl);
vuint64m4_t __riscv_th_vlbu_v_u64m4 (const uint64_t *a, size_t vl);
vuint64m8_t __riscv_th_vlbu_v_u64m8 (const uint64_t *a, size_t vl);
vuint16m1_t __riscv_th_vlhu_v_u16m1 (const uint16_t *a, size_t vl);
vuint16m2_t __riscv_th_vlhu_v_u16m2 (const uint16_t *a, size_t vl);
vuint16m4_t __riscv_th_vlhu_v_u16m4 (const uint16_t *a, size_t vl);
vuint16m8_t __riscv_th_vlhu_v_u16m8 (const uint16_t *a, size_t vl);
vuint32m1_t __riscv_th_vlhu_v_u32m1 (const uint32_t *a, size_t vl);
vuint32m2_t __riscv_th_vlhu_v_u32m2 (const uint32_t *a, size_t vl);
vuint32m4_t __riscv_th_vlhu_v_u32m4 (const uint32_t *a, size_t vl);
vuint32m8_t __riscv_th_vlhu_v_u32m8 (const uint32_t *a, size_t vl);
vuint64m1_t __riscv_th_vlhu_v_u64m1 (const uint64_t *a, size_t vl);
vuint64m2_t __riscv_th_vlhu_v_u64m2 (const uint64_t *a, size_t vl);
vuint64m4_t __riscv_th_vlhu_v_u64m4 (const uint64_t *a, size_t vl);
vuint64m8_t __riscv_th_vlhu_v_u64m8 (const uint64_t *a, size_t vl);
vuint32m1_t __riscv_th_vlwu_v_u32m1 (const uint32_t *a, size_t vl);
vuint32m2_t __riscv_th_vlwu_v_u32m2 (const uint32_t *a, size_t vl);
vuint32m4_t __riscv_th_vlwu_v_u32m4 (const uint32_t *a, size_t vl);
vuint32m8_t __riscv_th_vlwu_v_u32m8 (const uint32_t *a, size_t vl);
vuint64m1_t __riscv_th_vlwu_v_u64m1 (const uint64_t *a, size_t vl);
vuint64m2_t __riscv_th_vlwu_v_u64m2 (const uint64_t *a, size_t vl);
vuint64m4_t __riscv_th_vlwu_v_u64m4 (const uint64_t *a, size_t vl);
vuint64m8_t __riscv_th_vlwu_v_u64m8 (const uint64_t *a, size_t vl);
// masked functions
vint8m1_t __riscv_th_vlb_v_i8m1_m (vbool8_t mask, const int8_t *a, size_t vl);
vint8m2_t __riscv_th_vlb_v_i8m2_m (vbool4_t mask, const int8_t *a, size_t vl);
vint8m4_t __riscv_th_vlb_v_i8m4_m (vbool2_t mask, const int8_t *a, size_t vl);
vint8m8_t __riscv_th_vlb_v_i8m8_m (vbool1_t mask, const int8_t *a, size_t vl);
vint16m1_t __riscv_th_vlb_v_i16m1_m (vbool16_t mask, const int16_t *a, size_t vl);
vint16m2_t __riscv_th_vlb_v_i16m2_m (vbool8_t mask, const int16_t *a, size_t vl);
vint16m4_t __riscv_th_vlb_v_i16m4_m (vbool4_t mask, const int16_t *a, size_t vl);
vint16m8_t __riscv_th_vlb_v_i16m8_m (vbool2_t mask, const int16_t *a, size_t vl);
vint32m1_t __riscv_th_vlb_v_i32m1_m (vbool32_t mask, const int32_t *a, size_t vl);
vint32m2_t __riscv_th_vlb_v_i32m2_m (vbool16_t mask, const int32_t *a, size_t vl);
vint32m4_t __riscv_th_vlb_v_i32m4_m (vbool8_t mask, const int32_t *a, size_t vl);
vint32m8_t __riscv_th_vlb_v_i32m8_m (vbool4_t mask, const int32_t *a, size_t vl);
vint64m1_t __riscv_th_vlb_v_i64m1_m (vbool64_t mask, const int64_t *a, size_t vl);
vint64m2_t __riscv_th_vlb_v_i64m2_m (vbool32_t mask, const int64_t *a, size_t vl);
vint64m4_t __riscv_th_vlb_v_i64m4_m (vbool16_t mask, const int64_t *a, size_t vl);
vint64m8_t __riscv_th_vlb_v_i64m8_m (vbool8_t mask, const int64_t *a, size_t vl);
vint16m1_t __riscv_th_vlh_v_i16m1_m (vbool16_t mask, const int16_t *a, size_t vl);
vint16m2_t __riscv_th_vlh_v_i16m2_m (vbool8_t mask, const int16_t *a, size_t vl);
vint16m4_t __riscv_th_vlh_v_i16m4_m (vbool4_t mask, const int16_t *a, size_t vl);
vint16m8_t __riscv_th_vlh_v_i16m8_m (vbool2_t mask, const int16_t *a, size_t vl);
vint32m1_t __riscv_th_vlh_v_i32m1_m (vbool32_t mask, const int32_t *a, size_t vl);
vint32m2_t __riscv_th_vlh_v_i32m2_m (vbool16_t mask, const int32_t *a, size_t vl);
vint32m4_t __riscv_th_vlh_v_i32m4_m (vbool8_t mask, const int32_t *a, size_t vl);
vint32m8_t __riscv_th_vlh_v_i32m8_m (vbool4_t mask, const int32_t *a, size_t vl);
vint64m1_t __riscv_th_vlh_v_i64m1_m (vbool64_t mask, const int64_t *a, size_t vl);
vint64m2_t __riscv_th_vlh_v_i64m2_m (vbool32_t mask, const int64_t *a, size_t vl);
vint64m4_t __riscv_th_vlh_v_i64m4_m (vbool16_t mask, const int64_t *a, size_t vl);
vint64m8_t __riscv_th_vlh_v_i64m8_m (vbool8_t mask, const int64_t *a, size_t vl);
vint32m1_t __riscv_th_vlw_v_i32m1_m (vbool32_t mask, const int32_t *a, size_t vl);
vint32m2_t __riscv_th_vlw_v_i32m2_m (vbool16_t mask, const int32_t *a, size_t vl);
vint32m4_t __riscv_th_vlw_v_i32m4_m (vbool8_t mask, const int32_t *a, size_t vl);
vint32m8_t __riscv_th_vlw_v_i32m8_m (vbool4_t mask, const int32_t *a, size_t vl);
vint64m1_t __riscv_th_vlw_v_i64m1_m (vbool64_t mask, const int64_t *a, size_t vl);
vint64m2_t __riscv_th_vlw_v_i64m2_m (vbool32_t mask, const int64_t *a, size_t vl);
vint64m4_t __riscv_th_vlw_v_i64m4_m (vbool16_t mask, const int64_t *a, size_t vl);
vint64m8_t __riscv_th_vlw_v_i64m8_m (vbool8_t mask, const int64_t *a, size_t vl);
vuint8m1_t __riscv_th_vlbu_v_u8m1_m (vbool8_t mask, const uint8_t *a, size_t vl);
vuint8m2_t __riscv_th_vlbu_v_u8m2_m (vbool4_t mask, const uint8_t *a, size_t vl);
vuint8m4_t __riscv_th_vlbu_v_u8m4_m (vbool2_t mask, const uint8_t *a, size_t vl);
vuint8m8_t __riscv_th_vlbu_v_u8m8_m (vbool1_t mask, const uint8_t *a, size_t vl);
vuint16m1_t __riscv_th_vlbu_v_u16m1_m (vbool16_t mask, const uint16_t *a, size_t vl);
vuint16m2_t __riscv_th_vlbu_v_u16m2_m (vbool8_t mask, const uint16_t *a, size_t vl);
vuint16m4_t __riscv_th_vlbu_v_u16m4_m (vbool4_t mask, const uint16_t *a, size_t vl);
vuint16m8_t __riscv_th_vlbu_v_u16m8_m (vbool2_t mask, const uint16_t *a, size_t vl);
vuint32m1_t __riscv_th_vlbu_v_u32m1_m (vbool32_t mask, const uint32_t *a, size_t vl);
vuint32m2_t __riscv_th_vlbu_v_u32m2_m (vbool16_t mask, const uint32_t *a, size_t vl);
vuint32m4_t __riscv_th_vlbu_v_u32m4_m (vbool8_t mask, const uint32_t *a, size_t vl);
vuint32m8_t __riscv_th_vlbu_v_u32m8_m (vbool4_t mask, const uint32_t *a, size_t vl);
vuint64m1_t __riscv_th_vlbu_v_u64m1_m (vbool64_t mask, const uint64_t *a, size_t vl);
vuint64m2_t __riscv_th_vlbu_v_u64m2_m (vbool32_t mask, const uint64_t *a, size_t vl);
vuint64m4_t __riscv_th_vlbu_v_u64m4_m (vbool16_t mask, const uint64_t *a, size_t vl);
vuint64m8_t __riscv_th_vlbu_v_u64m8_m (vbool8_t mask, const uint64_t *a, size_t vl);
vuint16m1_t __riscv_th_vlhu_v_u16m1_m (vbool16_t mask, const uint16_t *a, size_t vl);
vuint16m2_t __riscv_th_vlhu_v_u16m2_m (vbool8_t mask, const uint16_t *a, size_t vl);
vuint16m4_t __riscv_th_vlhu_v_u16m4_m (vbool4_t mask, const uint16_t *a, size_t vl);
vuint16m8_t __riscv_th_vlhu_v_u16m8_m (vbool2_t mask, const uint16_t *a, size_t vl);
vuint32m1_t __riscv_th_vlhu_v_u32m1_m (vbool32_t mask, const uint32_t *a, size_t vl);
vuint32m2_t __riscv_th_vlhu_v_u32m2_m (vbool16_t mask, const uint32_t *a, size_t vl);
vuint32m4_t __riscv_th_vlhu_v_u32m4_m (vbool8_t mask, const uint32_t *a, size_t vl);
vuint32m8_t __riscv_th_vlhu_v_u32m8_m (vbool4_t mask, const uint32_t *a, size_t vl);
vuint64m1_t __riscv_th_vlhu_v_u64m1_m (vbool64_t mask, const uint64_t *a, size_t vl);
vuint64m2_t __riscv_th_vlhu_v_u64m2_m (vbool32_t mask, const uint64_t *a, size_t vl);
vuint64m4_t __riscv_th_vlhu_v_u64m4_m (vbool16_t mask, const uint64_t *a, size_t vl);
vuint64m8_t __riscv_th_vlhu_v_u64m8_m (vbool8_t mask, const uint64_t *a, size_t vl);
vuint32m1_t __riscv_th_vlwu_v_u32m1_m (vbool32_t mask, const uint32_t *a, size_t vl);
vuint32m2_t __riscv_th_vlwu_v_u32m2_m (vbool16_t mask, const uint32_t *a, size_t vl);
vuint32m4_t __riscv_th_vlwu_v_u32m4_m (vbool8_t mask, const uint32_t *a, size_t vl);
vuint32m8_t __riscv_th_vlwu_v_u32m8_m (vbool4_t mask, const uint32_t *a, size_t vl);
vuint64m1_t __riscv_th_vlwu_v_u64m1_m (vbool64_t mask, const uint64_t *a, size_t vl);
vuint64m2_t __riscv_th_vlwu_v_u64m2_m (vbool32_t mask, const uint64_t *a, size_t vl);
vuint64m4_t __riscv_th_vlwu_v_u64m4_m (vbool16_t mask, const uint64_t *a, size_t vl);
vuint64m8_t __riscv_th_vlwu_v_u64m8_m (vbool8_t mask, const uint64_t *a, size_t vl);
XTheadVector Unit-Stride Store Intrinsics
void __riscv_th_vsb_v_i8m1 (int8_t *a, vint8m1_t b, size_t vl);
void __riscv_th_vsb_v_i8m2 (int8_t *a, vint8m2_t b, size_t vl);
void __riscv_th_vsb_v_i8m4 (int8_t *a, vint8m4_t b, size_t vl);
void __riscv_th_vsb_v_i8m8 (int8_t *a, vint8m8_t b, size_t vl);
void __riscv_th_vsb_v_i16m1 (int16_t *a, vint16m1_t b, size_t vl);
void __riscv_th_vsb_v_i16m2 (int16_t *a, vint16m2_t b, size_t vl);
void __riscv_th_vsb_v_i16m4 (int16_t *a, vint16m4_t b, size_t vl);
void __riscv_th_vsb_v_i16m8 (int16_t *a, vint16m8_t b, size_t vl);
void __riscv_th_vsb_v_i32m1 (int32_t *a, vint32m1_t b, size_t vl);
void __riscv_th_vsb_v_i32m2 (int32_t *a, vint32m2_t b, size_t vl);
void __riscv_th_vsb_v_i32m4 (int32_t *a, vint32m4_t b, size_t vl);
void __riscv_th_vsb_v_i32m8 (int32_t *a, vint32m8_t b, size_t vl);
void __riscv_th_vsb_v_u8m1 (uint8_t *a, vuint8m1_t b, size_t vl);
void __riscv_th_vsb_v_u8m2 (uint8_t *a, vuint8m2_t b, size_t vl);
void __riscv_th_vsb_v_u8m4 (uint8_t *a, vuint8m4_t b, size_t vl);
void __riscv_th_vsb_v_u8m8 (uint8_t *a, vuint8m8_t b, size_t vl);
void __riscv_th_vsb_v_u16m1 (uint16_t *a, vuint16m1_t b, size_t vl);
void __riscv_th_vsb_v_u16m2 (uint16_t *a, vuint16m2_t b, size_t vl);
void __riscv_th_vsb_v_u16m4 (uint16_t *a, vuint16m4_t b, size_t vl);
void __riscv_th_vsb_v_u16m8 (uint16_t *a, vuint16m8_t b, size_t vl);
void __riscv_th_vsb_v_u32m1 (uint32_t *a, vuint32m1_t b, size_t vl);
void __riscv_th_vsb_v_u32m2 (uint32_t *a, vuint32m2_t b, size_t vl);
void __riscv_th_vsb_v_u32m4 (uint32_t *a, vuint32m4_t b, size_t vl);
void __riscv_th_vsb_v_u32m8 (uint32_t *a, vuint32m8_t b, size_t vl);
void __riscv_th_vsh_v_i16m1 (int16_t *a, vint16m1_t b, size_t vl);
void __riscv_th_vsh_v_i16m2 (int16_t *a, vint16m2_t b, size_t vl);
void __riscv_th_vsh_v_i16m4 (int16_t *a, vint16m4_t b, size_t vl);
void __riscv_th_vsh_v_i16m8 (int16_t *a, vint16m8_t b, size_t vl);
void __riscv_th_vsh_v_i32m1 (int32_t *a, vint32m1_t b, size_t vl);
void __riscv_th_vsh_v_i32m2 (int32_t *a, vint32m2_t b, size_t vl);
void __riscv_th_vsh_v_i32m4 (int32_t *a, vint32m4_t b, size_t vl);
void __riscv_th_vsh_v_i32m8 (int32_t *a, vint32m8_t b, size_t vl);
void __riscv_th_vsh_v_u16m1 (uint16_t *a, vuint16m1_t b, size_t vl);
void __riscv_th_vsh_v_u16m2 (uint16_t *a, vuint16m2_t b, size_t vl);
void __riscv_th_vsh_v_u16m4 (uint16_t *a, vuint16m4_t b, size_t vl);
void __riscv_th_vsh_v_u16m8 (uint16_t *a, vuint16m8_t b, size_t vl);
void __riscv_th_vsh_v_u32m1 (uint32_t *a, vuint32m1_t b, size_t vl);
void __riscv_th_vsh_v_u32m2 (uint32_t *a, vuint32m2_t b, size_t vl);
void __riscv_th_vsh_v_u32m4 (uint32_t *a, vuint32m4_t b, size_t vl);
void __riscv_th_vsh_v_u32m8 (uint32_t *a, vuint32m8_t b, size_t vl);
void __riscv_th_vsw_v_i32m1 (int32_t *a, vint32m1_t b, size_t vl);
void __riscv_th_vsw_v_i32m2 (int32_t *a, vint32m2_t b, size_t vl);
void __riscv_th_vsw_v_i32m4 (int32_t *a, vint32m4_t b, size_t vl);
void __riscv_th_vsw_v_i32m8 (int32_t *a, vint32m8_t b, size_t vl);
void __riscv_th_vsw_v_u32m1 (uint32_t *a, vuint32m1_t b, size_t vl);
void __riscv_th_vsw_v_u32m2 (uint32_t *a, vuint32m2_t b, size_t vl);
void __riscv_th_vsw_v_u32m4 (uint32_t *a, vuint32m4_t b, size_t vl);
void __riscv_th_vsw_v_u32m8 (uint32_t *a, vuint32m8_t b, size_t vl);
// masked functions
void __riscv_th_vsb_v_i8m1_m (vbool8_t mask, int8_t *a, vint8m1_t b, size_t vl);
void __riscv_th_vsb_v_i8m2_m (vbool4_t mask, int8_t *a, vint8m2_t b, size_t vl);
void __riscv_th_vsb_v_i8m4_m (vbool2_t mask, int8_t *a, vint8m4_t b, size_t vl);
void __riscv_th_vsb_v_i8m8_m (vbool1_t mask, int8_t *a, vint8m8_t b, size_t vl);
void __riscv_th_vsb_v_i16m1_m (vbool16_t mask, int16_t *a, vint16m1_t b, size_t vl);
void __riscv_th_vsb_v_i16m2_m (vbool8_t mask, int16_t *a, vint16m2_t b, size_t vl);
void __riscv_th_vsb_v_i16m4_m (vbool4_t mask, int16_t *a, vint16m4_t b, size_t vl);
void __riscv_th_vsb_v_i16m8_m (vbool2_t mask, int16_t *a, vint16m8_t b, size_t vl);
void __riscv_th_vsb_v_i32m1_m (vbool32_t mask, int32_t *a, vint32m1_t b, size_t vl);
void __riscv_th_vsb_v_i32m2_m (vbool16_t mask, int32_t *a, vint32m2_t b, size_t vl);
void __riscv_th_vsb_v_i32m4_m (vbool8_t mask, int32_t *a, vint32m4_t b, size_t vl);
void __riscv_th_vsb_v_i32m8_m (vbool4_t mask, int32_t *a, vint32m8_t b, size_t vl);
void __riscv_th_vsb_v_u8m1_m (vbool8_t mask, uint8_t *a, vuint8m1_t b, size_t vl);
void __riscv_th_vsb_v_u8m2_m (vbool4_t mask, uint8_t *a, vuint8m2_t b, size_t vl);
void __riscv_th_vsb_v_u8m4_m (vbool2_t mask, uint8_t *a, vuint8m4_t b, size_t vl);
void __riscv_th_vsb_v_u8m8_m (vbool1_t mask, uint8_t *a, vuint8m8_t b, size_t vl);
void __riscv_th_vsb_v_u16m1_m (vbool16_t mask, uint16_t *a, vuint16m1_t b, size_t vl);
void __riscv_th_vsb_v_u16m2_m (vbool8_t mask, uint16_t *a, vuint16m2_t b, size_t vl);
void __riscv_th_vsb_v_u16m4_m (vbool4_t mask, uint16_t *a, vuint16m4_t b, size_t vl);
void __riscv_th_vsb_v_u16m8_m (vbool2_t mask, uint16_t *a, vuint16m8_t b, size_t vl);
void __riscv_th_vsb_v_u32m1_m (vbool32_t mask, uint32_t *a, vuint32m1_t b, size_t vl);
void __riscv_th_vsb_v_u32m2_m (vbool16_t mask, uint32_t *a, vuint32m2_t b, size_t vl);
void __riscv_th_vsb_v_u32m4_m (vbool8_t mask, uint32_t *a, vuint32m4_t b, size_t vl);
void __riscv_th_vsb_v_u32m8_m (vbool4_t mask, uint32_t *a, vuint32m8_t b, size_t vl);
void __riscv_th_vsh_v_i16m1_m (vbool16_t mask, int16_t *a, vint16m1_t b, size_t vl);
void __riscv_th_vsh_v_i16m2_m (vbool8_t mask, int16_t *a, vint16m2_t b, size_t vl);
void __riscv_th_vsh_v_i16m4_m (vbool4_t mask, int16_t *a, vint16m4_t b, size_t vl);
void __riscv_th_vsh_v_i16m8_m (vbool2_t mask, int16_t *a, vint16m8_t b, size_t vl);
void __riscv_th_vsh_v_i32m1_m (vbool32_t mask, int32_t *a, vint32m1_t b, size_t vl);
void __riscv_th_vsh_v_i32m2_m (vbool16_t mask, int32_t *a, vint32m2_t b, size_t vl);
void __riscv_th_vsh_v_i32m4_m (vbool8_t mask, int32_t *a, vint32m4_t b, size_t vl);
void __riscv_th_vsh_v_i32m8_m (vbool4_t mask, int32_t *a, vint32m8_t b, size_t vl);
void __riscv_th_vsh_v_u16m1_m (vbool16_t mask, uint16_t *a, vuint16m1_t b, size_t vl);
void __riscv_th_vsh_v_u16m2_m (vbool8_t mask, uint16_t *a, vuint16m2_t b, size_t vl);
void __riscv_th_vsh_v_u16m4_m (vbool4_t mask, uint16_t *a, vuint16m4_t b, size_t vl);
void __riscv_th_vsh_v_u16m8_m (vbool2_t mask, uint16_t *a, vuint16m8_t b, size_t vl);
void __riscv_th_vsh_v_u32m1_m (vbool32_t mask, uint32_t *a, vuint32m1_t b, size_t vl);
void __riscv_th_vsh_v_u32m2_m (vbool16_t mask, uint32_t *a, vuint32m2_t b, size_t vl);
void __riscv_th_vsh_v_u32m4_m (vbool8_t mask, uint32_t *a, vuint32m4_t b, size_t vl);
void __riscv_th_vsh_v_u32m8_m (vbool4_t mask, uint32_t *a, vuint32m8_t b, size_t vl);
void __riscv_th_vsw_v_i32m1_m (vbool32_t mask, int32_t *a, vint32m1_t b, size_t vl);
void __riscv_th_vsw_v_i32m2_m (vbool16_t mask, int32_t *a, vint32m2_t b, size_t vl);
void __riscv_th_vsw_v_i32m4_m (vbool8_t mask, int32_t *a, vint32m4_t b, size_t vl);
void __riscv_th_vsw_v_i32m8_m (vbool4_t mask, int32_t *a, vint32m8_t b, size_t vl);
void __riscv_th_vsw_v_u32m1_m (vbool32_t mask, uint32_t *a, vuint32m1_t b, size_t vl);
void __riscv_th_vsw_v_u32m2_m (vbool16_t mask, uint32_t *a, vuint32m2_t b, size_t vl);
void __riscv_th_vsw_v_u32m4_m (vbool8_t mask, uint32_t *a, vuint32m4_t b, size_t vl);
void __riscv_th_vsw_v_u32m8_m (vbool4_t mask, uint32_t *a, vuint32m8_t b, size_t vl);
XTheadVector Strided Load Intrinsics
vint8m1_t __riscv_th_vlsb_v_i8m1 (const int8_t *a, size_t stride, size_t vl);
vint8m2_t __riscv_th_vlsb_v_i8m2 (const int8_t *a, size_t stride, size_t vl);
vint8m4_t __riscv_th_vlsb_v_i8m4 (const int8_t *a, size_t stride, size_t vl);
vint8m8_t __riscv_th_vlsb_v_i8m8 (const int8_t *a, size_t stride, size_t vl);
vint16m1_t __riscv_th_vlsb_v_i16m1 (const int16_t *a, size_t stride, size_t vl);
vint16m2_t __riscv_th_vlsb_v_i16m2 (const int16_t *a, size_t stride, size_t vl);
vint16m4_t __riscv_th_vlsb_v_i16m4 (const int16_t *a, size_t stride, size_t vl);
vint16m8_t __riscv_th_vlsb_v_i16m8 (const int16_t *a, size_t stride, size_t vl);
vint32m1_t __riscv_th_vlsb_v_i32m1 (const int32_t *a, size_t stride, size_t vl);
vint32m2_t __riscv_th_vlsb_v_i32m2 (const int32_t *a, size_t stride, size_t vl);
vint32m4_t __riscv_th_vlsb_v_i32m4 (const int32_t *a, size_t stride, size_t vl);
vint32m8_t __riscv_th_vlsb_v_i32m8 (const int32_t *a, size_t stride, size_t vl);
vint64m1_t __riscv_th_vlsb_v_i64m1 (const int64_t *a, size_t stride, size_t vl);
vint64m2_t __riscv_th_vlsb_v_i64m2 (const int64_t *a, size_t stride, size_t vl);
vint64m4_t __riscv_th_vlsb_v_i64m4 (const int64_t *a, size_t stride, size_t vl);
vint64m8_t __riscv_th_vlsb_v_i64m8 (const int64_t *a, size_t stride, size_t vl);
vint16m1_t __riscv_th_vlsh_v_i16m1 (const int16_t *a, size_t stride, size_t vl);
vint16m2_t __riscv_th_vlsh_v_i16m2 (const int16_t *a, size_t stride, size_t vl);
vint16m4_t __riscv_th_vlsh_v_i16m4 (const int16_t *a, size_t stride, size_t vl);
vint16m8_t __riscv_th_vlsh_v_i16m8 (const int16_t *a, size_t stride, size_t vl);
vint32m1_t __riscv_th_vlsh_v_i32m1 (const int32_t *a, size_t stride, size_t vl);
vint32m2_t __riscv_th_vlsh_v_i32m2 (const int32_t *a, size_t stride, size_t vl);
vint32m4_t __riscv_th_vlsh_v_i32m4 (const int32_t *a, size_t stride, size_t vl);
vint32m8_t __riscv_th_vlsh_v_i32m8 (const int32_t *a, size_t stride, size_t vl);
vint64m1_t __riscv_th_vlsh_v_i64m1 (const int64_t *a, size_t stride, size_t vl);
vint64m2_t __riscv_th_vlsh_v_i64m2 (const int64_t *a, size_t stride, size_t vl);
vint64m4_t __riscv_th_vlsh_v_i64m4 (const int64_t *a, size_t stride, size_t vl);
vint64m8_t __riscv_th_vlsh_v_i64m8 (const int64_t *a, size_t stride, size_t vl);
vint32m1_t __riscv_th_vlsw_v_i32m1 (const int32_t *a, size_t stride, size_t vl);
vint32m2_t __riscv_th_vlsw_v_i32m2 (const int32_t *a, size_t stride, size_t vl);
vint32m4_t __riscv_th_vlsw_v_i32m4 (const int32_t *a, size_t stride, size_t vl);
vint32m8_t __riscv_th_vlsw_v_i32m8 (const int32_t *a, size_t stride, size_t vl);
vint64m1_t __riscv_th_vlsw_v_i64m1 (const int64_t *a, size_t stride, size_t vl);
vint64m2_t __riscv_th_vlsw_v_i64m2 (const int64_t *a, size_t stride, size_t vl);
vint64m4_t __riscv_th_vlsw_v_i64m4 (const int64_t *a, size_t stride, size_t vl);
vint64m8_t __riscv_th_vlsw_v_i64m8 (const int64_t *a, size_t stride, size_t vl);
vuint8m1_t __riscv_th_vlsbu_v_u8m1 (const uint8_t *a, size_t stride, size_t vl);
vuint8m2_t __riscv_th_vlsbu_v_u8m2 (const uint8_t *a, size_t stride, size_t vl);
vuint8m4_t __riscv_th_vlsbu_v_u8m4 (const uint8_t *a, size_t stride, size_t vl);
vuint8m8_t __riscv_th_vlsbu_v_u8m8 (const uint8_t *a, size_t stride, size_t vl);
vuint16m1_t __riscv_th_vlsbu_v_u16m1 (const uint16_t *a, size_t stride, size_t vl);
vuint16m2_t __riscv_th_vlsbu_v_u16m2 (const uint16_t *a, size_t stride, size_t vl);
vuint16m4_t __riscv_th_vlsbu_v_u16m4 (const uint16_t *a, size_t stride, size_t vl);
vuint16m8_t __riscv_th_vlsbu_v_u16m8 (const uint16_t *a, size_t stride, size_t vl);
vuint32m1_t __riscv_th_vlsbu_v_u32m1 (const uint32_t *a, size_t stride, size_t vl);
vuint32m2_t __riscv_th_vlsbu_v_u32m2 (const uint32_t *a, size_t stride, size_t vl);
vuint32m4_t __riscv_th_vlsbu_v_u32m4 (const uint32_t *a, size_t stride, size_t vl);
vuint32m8_t __riscv_th_vlsbu_v_u32m8 (const uint32_t *a, size_t stride, size_t vl);
vuint64m1_t __riscv_th_vlsbu_v_u64m1 (const uint64_t *a, size_t stride, size_t vl);
vuint64m2_t __riscv_th_vlsbu_v_u64m2 (const uint64_t *a, size_t stride, size_t vl);
vuint64m4_t __riscv_th_vlsbu_v_u64m4 (const uint64_t *a, size_t stride, size_t vl);
vuint64m8_t __riscv_th_vlsbu_v_u64m8 (const uint64_t *a, size_t stride, size_t vl);
vuint16m1_t __riscv_th_vlshu_v_u16m1 (const uint16_t *a, size_t stride, size_t vl);
vuint16m2_t __riscv_th_vlshu_v_u16m2 (const uint16_t *a, size_t stride, size_t vl);
vuint16m4_t __riscv_th_vlshu_v_u16m4 (const uint16_t *a, size_t stride, size_t vl);
vuint16m8_t __riscv_th_vlshu_v_u16m8 (const uint16_t *a, size_t stride, size_t vl);
vuint32m1_t __riscv_th_vlshu_v_u32m1 (const uint32_t *a, size_t stride, size_t vl);
vuint32m2_t __riscv_th_vlshu_v_u32m2 (const uint32_t *a, size_t stride, size_t vl);
vuint32m4_t __riscv_th_vlshu_v_u32m4 (const uint32_t *a, size_t stride, size_t vl);
vuint32m8_t __riscv_th_vlshu_v_u32m8 (const uint32_t *a, size_t stride, size_t vl);
vuint64m1_t __riscv_th_vlshu_v_u64m1 (const uint64_t *a, size_t stride, size_t vl);
vuint64m2_t __riscv_th_vlshu_v_u64m2 (const uint64_t *a, size_t stride, size_t vl);
vuint64m4_t __riscv_th_vlshu_v_u64m4 (const uint64_t *a, size_t stride, size_t vl);
vuint64m8_t __riscv_th_vlshu_v_u64m8 (const uint64_t *a, size_t stride, size_t vl);
vuint32m1_t __riscv_th_vlswu_v_u32m1 (const uint32_t *a, size_t stride, size_t vl);
vuint32m2_t __riscv_th_vlswu_v_u32m2 (const uint32_t *a, size_t stride, size_t vl);
vuint32m4_t __riscv_th_vlswu_v_u32m4 (const uint32_t *a, size_t stride, size_t vl);
vuint32m8_t __riscv_th_vlswu_v_u32m8 (const uint32_t *a, size_t stride, size_t vl);
vuint64m1_t __riscv_th_vlswu_v_u64m1 (const uint64_t *a, size_t stride, size_t vl);
vuint64m2_t __riscv_th_vlswu_v_u64m2 (const uint64_t *a, size_t stride, size_t vl);
vuint64m4_t __riscv_th_vlswu_v_u64m4 (const uint64_t *a, size_t stride, size_t vl);
vuint64m8_t __riscv_th_vlswu_v_u64m8 (const uint64_t *a, size_t stride, size_t vl);
// masked functions
vint8m1_t __riscv_th_vlsb_v_i8m1_m (vbool8_t mask, const int8_t *a, size_t stride, size_t vl);
vint8m2_t __riscv_th_vlsb_v_i8m2_m (vbool4_t mask, const int8_t *a, size_t stride, size_t vl);
vint8m4_t __riscv_th_vlsb_v_i8m4_m (vbool2_t mask, const int8_t *a, size_t stride, size_t vl);
vint8m8_t __riscv_th_vlsb_v_i8m8_m (vbool1_t mask, const int8_t *a, size_t stride, size_t vl);
vint16m1_t __riscv_th_vlsb_v_i16m1_m (vbool16_t mask, const int16_t *a, size_t stride, size_t vl);
vint16m2_t __riscv_th_vlsb_v_i16m2_m (vbool8_t mask, const int16_t *a, size_t stride, size_t vl);
vint16m4_t __riscv_th_vlsb_v_i16m4_m (vbool4_t mask, const int16_t *a, size_t stride, size_t vl);
vint16m8_t __riscv_th_vlsb_v_i16m8_m (vbool2_t mask, const int16_t *a, size_t stride, size_t vl);
vint32m1_t __riscv_th_vlsb_v_i32m1_m (vbool32_t mask, const int32_t *a, size_t stride, size_t vl);
vint32m2_t __riscv_th_vlsb_v_i32m2_m (vbool16_t mask, const int32_t *a, size_t stride, size_t vl);
vint32m4_t __riscv_th_vlsb_v_i32m4_m (vbool8_t mask, const int32_t *a, size_t stride, size_t vl);
vint32m8_t __riscv_th_vlsb_v_i32m8_m (vbool4_t mask, const int32_t *a, size_t stride, size_t vl);
vint64m1_t __riscv_th_vlsb_v_i64m1_m (vbool64_t mask, const int64_t *a, size_t stride, size_t vl);
vint64m2_t __riscv_th_vlsb_v_i64m2_m (vbool32_t mask, const int64_t *a, size_t stride, size_t vl);
vint64m4_t __riscv_th_vlsb_v_i64m4_m (vbool16_t mask, const int64_t *a, size_t stride, size_t vl);
vint64m8_t __riscv_th_vlsb_v_i64m8_m (vbool8_t mask, const int64_t *a, size_t stride, size_t vl);
vint16m1_t __riscv_th_vlsh_v_i16m1_m (vbool16_t mask, const int16_t *a, size_t stride, size_t vl);
vint16m2_t __riscv_th_vlsh_v_i16m2_m (vbool8_t mask, const int16_t *a, size_t stride, size_t vl);
vint16m4_t __riscv_th_vlsh_v_i16m4_m (vbool4_t mask, const int16_t *a, size_t stride, size_t vl);
vint16m8_t __riscv_th_vlsh_v_i16m8_m (vbool2_t mask, const int16_t *a, size_t stride, size_t vl);
vint32m1_t __riscv_th_vlsh_v_i32m1_m (vbool32_t mask, const int32_t *a, size_t stride, size_t vl);
vint32m2_t __riscv_th_vlsh_v_i32m2_m (vbool16_t mask, const int32_t *a, size_t stride, size_t vl);
vint32m4_t __riscv_th_vlsh_v_i32m4_m (vbool8_t mask, const int32_t *a, size_t stride, size_t vl);
vint32m8_t __riscv_th_vlsh_v_i32m8_m (vbool4_t mask, const int32_t *a, size_t stride, size_t vl);
vint64m1_t __riscv_th_vlsh_v_i64m1_m (vbool64_t mask, const int64_t *a, size_t stride, size_t vl);
vint64m2_t __riscv_th_vlsh_v_i64m2_m (vbool32_t mask, const int64_t *a, size_t stride, size_t vl);
vint64m4_t __riscv_th_vlsh_v_i64m4_m (vbool16_t mask, const int64_t *a, size_t stride, size_t vl);
vint64m8_t __riscv_th_vlsh_v_i64m8_m (vbool8_t mask, const int64_t *a, size_t stride, size_t vl);
vint32m1_t __riscv_th_vlsw_v_i32m1_m (vbool32_t mask, const int32_t *a, size_t stride, size_t vl);
vint32m2_t __riscv_th_vlsw_v_i32m2_m (vbool16_t mask, const int32_t *a, size_t stride, size_t vl);
vint32m4_t __riscv_th_vlsw_v_i32m4_m (vbool8_t mask, const int32_t *a, size_t stride, size_t vl);
vint32m8_t __riscv_th_vlsw_v_i32m8_m (vbool4_t mask, const int32_t *a, size_t stride, size_t vl);
vint64m1_t __riscv_th_vlsw_v_i64m1_m (vbool64_t mask, const int64_t *a, size_t stride, size_t vl);
vint64m2_t __riscv_th_vlsw_v_i64m2_m (vbool32_t mask, const int64_t *a, size_t stride, size_t vl);
vint64m4_t __riscv_th_vlsw_v_i64m4_m (vbool16_t mask, const int64_t *a, size_t stride, size_t vl);
vint64m8_t __riscv_th_vlsw_v_i64m8_m (vbool8_t mask, const int64_t *a, size_t stride, size_t vl);
vuint8m1_t __riscv_th_vlsbu_v_u8m1_m (vbool8_t mask, const uint8_t *a, size_t stride, size_t vl);
vuint8m2_t __riscv_th_vlsbu_v_u8m2_m (vbool4_t mask, const uint8_t *a, size_t stride, size_t vl);
vuint8m4_t __riscv_th_vlsbu_v_u8m4_m (vbool2_t mask, const uint8_t *a, size_t stride, size_t vl);
vuint8m8_t __riscv_th_vlsbu_v_u8m8_m (vbool1_t mask, const uint8_t *a, size_t stride, size_t vl);
vuint16m1_t __riscv_th_vlsbu_v_u16m1_m (vbool16_t mask, const uint16_t *a, size_t stride, size_t vl);
vuint16m2_t __riscv_th_vlsbu_v_u16m2_m (vbool8_t mask, const uint16_t *a, size_t stride, size_t vl);
vuint16m4_t __riscv_th_vlsbu_v_u16m4_m (vbool4_t mask, const uint16_t *a, size_t stride, size_t vl);
vuint16m8_t __riscv_th_vlsbu_v_u16m8_m (vbool2_t mask, const uint16_t *a, size_t stride, size_t vl);
vuint32m1_t __riscv_th_vlsbu_v_u32m1_m (vbool32_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint32m2_t __riscv_th_vlsbu_v_u32m2_m (vbool16_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint32m4_t __riscv_th_vlsbu_v_u32m4_m (vbool8_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint32m8_t __riscv_th_vlsbu_v_u32m8_m (vbool4_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint64m1_t __riscv_th_vlsbu_v_u64m1_m (vbool64_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint64m2_t __riscv_th_vlsbu_v_u64m2_m (vbool32_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint64m4_t __riscv_th_vlsbu_v_u64m4_m (vbool16_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint64m8_t __riscv_th_vlsbu_v_u64m8_m (vbool8_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint16m1_t __riscv_th_vlshu_v_u16m1_m (vbool16_t mask, const uint16_t *a, size_t stride, size_t vl);
vuint16m2_t __riscv_th_vlshu_v_u16m2_m (vbool8_t mask, const uint16_t *a, size_t stride, size_t vl);
vuint16m4_t __riscv_th_vlshu_v_u16m4_m (vbool4_t mask, const uint16_t *a, size_t stride, size_t vl);
vuint16m8_t __riscv_th_vlshu_v_u16m8_m (vbool2_t mask, const uint16_t *a, size_t stride, size_t vl);
vuint32m1_t __riscv_th_vlshu_v_u32m1_m (vbool32_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint32m2_t __riscv_th_vlshu_v_u32m2_m (vbool16_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint32m4_t __riscv_th_vlshu_v_u32m4_m (vbool8_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint32m8_t __riscv_th_vlshu_v_u32m8_m (vbool4_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint64m1_t __riscv_th_vlshu_v_u64m1_m (vbool64_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint64m2_t __riscv_th_vlshu_v_u64m2_m (vbool32_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint64m4_t __riscv_th_vlshu_v_u64m4_m (vbool16_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint64m8_t __riscv_th_vlshu_v_u64m8_m (vbool8_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint32m1_t __riscv_th_vlswu_v_u32m1_m (vbool32_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint32m2_t __riscv_th_vlswu_v_u32m2_m (vbool16_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint32m4_t __riscv_th_vlswu_v_u32m4_m (vbool8_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint32m8_t __riscv_th_vlswu_v_u32m8_m (vbool4_t mask, const uint32_t *a, size_t stride, size_t vl);
vuint64m1_t __riscv_th_vlswu_v_u64m1_m (vbool64_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint64m2_t __riscv_th_vlswu_v_u64m2_m (vbool32_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint64m4_t __riscv_th_vlswu_v_u64m4_m (vbool16_t mask, const uint64_t *a, size_t stride, size_t vl);
vuint64m8_t __riscv_th_vlswu_v_u64m8_m (vbool8_t mask, const uint64_t *a, size_t stride, size_t vl);
XTheadVector Strided Store Intrinsics
void __riscv_th_vssb_v_i8m1 (int8_t *a, size_t stride, vint8m1_t b, size_t vl);
void __riscv_th_vssb_v_i8m2 (int8_t *a, size_t stride, vint8m2_t b, size_t vl);
void __riscv_th_vssb_v_i8m4 (int8_t *a, size_t stride, vint8m4_t b, size_t vl);
void __riscv_th_vssb_v_i8m8 (int8_t *a, size_t stride, vint8m8_t b, size_t vl);
void __riscv_th_vssb_v_i16m1 (int16_t *a, size_t stride, vint16m1_t b, size_t vl);
void __riscv_th_vssb_v_i16m2 (int16_t *a, size_t stride, vint16m2_t b, size_t vl);
void __riscv_th_vssb_v_i16m4 (int16_t *a, size_t stride, vint16m4_t b, size_t vl);
void __riscv_th_vssb_v_i16m8 (int16_t *a, size_t stride, vint16m8_t b, size_t vl);
void __riscv_th_vssb_v_i32m1 (int32_t *a, size_t stride, vint32m1_t b, size_t vl);
void __riscv_th_vssb_v_i32m2 (int32_t *a, size_t stride, vint32m2_t b, size_t vl);
void __riscv_th_vssb_v_i32m4 (int32_t *a, size_t stride, vint32m4_t b, size_t vl);
void __riscv_th_vssb_v_i32m8 (int32_t *a, size_t stride, vint32m8_t b, size_t vl);
void __riscv_th_vssb_v_u8m1 (uint8_t *a, size_t stride, vuint8m1_t b, size_t vl);
void __riscv_th_vssb_v_u8m2 (uint8_t *a, size_t stride, vuint8m2_t b, size_t vl);
void __riscv_th_vssb_v_u8m4 (uint8_t *a, size_t stride, vuint8m4_t b, size_t vl);
void __riscv_th_vssb_v_u8m8 (uint8_t *a, size_t stride, vuint8m8_t b, size_t vl);
void __riscv_th_vssb_v_u16m1 (uint16_t *a, size_t stride, vuint16m1_t b, size_t vl);
void __riscv_th_vssb_v_u16m2 (uint16_t *a, size_t stride, vuint16m2_t b, size_t vl);
void __riscv_th_vssb_v_u16m4 (uint16_t *a, size_t stride, vuint16m4_t b, size_t vl);
void __riscv_th_vssb_v_u16m8 (uint16_t *a, size_t stride, vuint16m8_t b, size_t vl);
void __riscv_th_vssb_v_u32m1 (uint32_t *a, size_t stride, vuint32m1_t b, size_t vl);
void __riscv_th_vssb_v_u32m2 (uint32_t *a, size_t stride, vuint32m2_t b, size_t vl);
void __riscv_th_vssb_v_u32m4 (uint32_t *a, size_t stride, vuint32m4_t b, size_t vl);
void __riscv_th_vssb_v_u32m8 (uint32_t *a, size_t stride, vuint32m8_t b, size_t vl);
void __riscv_th_vssh_v_i16m1 (int16_t *a, size_t stride, vint16m1_t b, size_t vl);
void __riscv_th_vssh_v_i16m2 (int16_t *a, size_t stride, vint16m2_t b, size_t vl);
void __riscv_th_vssh_v_i16m4 (int16_t *a, size_t stride, vint16m4_t b, size_t vl);
void __riscv_th_vssh_v_i16m8 (int16_t *a, size_t stride, vint16m8_t b, size_t vl);
void __riscv_th_vssh_v_i32m1 (int32_t *a, size_t stride, vint32m1_t b, size_t vl);
void __riscv_th_vssh_v_i32m2 (int32_t *a, size_t stride, vint32m2_t b, size_t vl);
void __riscv_th_vssh_v_i32m4 (int32_t *a, size_t stride, vint32m4_t b, size_t vl);
void __riscv_th_vssh_v_i32m8 (int32_t *a, size_t stride, vint32m8_t b, size_t vl);
void __riscv_th_vssh_v_u16m1 (uint16_t *a, size_t stride, vuint16m1_t b, size_t vl);
void __riscv_th_vssh_v_u16m2 (uint16_t *a, size_t stride, vuint16m2_t b, size_t vl);
void __riscv_th_vssh_v_u16m4 (uint16_t *a, size_t stride, vuint16m4_t b, size_t vl);
void __riscv_th_vssh_v_u16m8 (uint16_t *a, size_t stride, vuint16m8_t b, size_t vl);
void __riscv_th_vssh_v_u32m1 (uint32_t *a, size_t stride, vuint32m1_t b, size_t vl);
void __riscv_th_vssh_v_u32m2 (uint32_t *a, size_t stride, vuint32m2_t b, size_t vl);
void __riscv_th_vssh_v_u32m4 (uint32_t *a, size_t stride, vuint32m4_t b, size_t vl);
void __riscv_th_vssh_v_u32m8 (uint32_t *a, size_t stride, vuint32m8_t b, size_t vl);
void __riscv_th_vssw_v_i32m1 (int32_t *a, size_t stride, vint32m1_t b, size_t vl);
void __riscv_th_vssw_v_i32m2 (int32_t *a, size_t stride, vint32m2_t b, size_t vl);
void __riscv_th_vssw_v_i32m4 (int32_t *a, size_t stride, vint32m4_t b, size_t vl);
void __riscv_th_vssw_v_i32m8 (int32_t *a, size_t stride, vint32m8_t b, size_t vl);
void __riscv_th_vssw_v_u32m1 (uint32_t *a, size_t stride, vuint32m1_t b, size_t vl);
void __riscv_th_vssw_v_u32m2 (uint32_t *a, size_t stride, vuint32m2_t b, size_t vl);
void __riscv_th_vssw_v_u32m4 (uint32_t *a, size_t stride, vuint32m4_t b, size_t vl);
void __riscv_th_vssw_v_u32m8 (uint32_t *a, size_t stride, vuint32m8_t b, size_t vl);
// masked functions
void __riscv_th_vssb_v_i8m1_m (vbool8_t mask, int8_t *a, size_t stride, vint8m1_t b, size_t vl);
void __riscv_th_vssb_v_i8m2_m (vbool4_t mask, int8_t *a, size_t stride, vint8m2_t b, size_t vl);
void __riscv_th_vssb_v_i8m4_m (vbool2_t mask, int8_t *a, size_t stride, vint8m4_t b, size_t vl);
void __riscv_th_vssb_v_i8m8_m (vbool1_t mask, int8_t *a, size_t stride, vint8m8_t b, size_t vl);
void __riscv_th_vssb_v_i16m1_m (vbool16_t mask, int16_t *a, size_t stride, vint16m1_t b, size_t vl);
void __riscv_th_vssb_v_i16m2_m (vbool8_t mask, int16_t *a, size_t stride, vint16m2_t b, size_t vl);
void __riscv_th_vssb_v_i16m4_m (vbool4_t mask, int16_t *a, size_t stride, vint16m4_t b, size_t vl);
void __riscv_th_vssb_v_i16m8_m (vbool2_t mask, int16_t *a, size_t stride, vint16m8_t b, size_t vl);
void __riscv_th_vssb_v_i32m1_m (vbool32_t mask, int32_t *a, size_t stride, vint32m1_t b, size_t vl);
void __riscv_th_vssb_v_i32m2_m (vbool16_t mask, int32_t *a, size_t stride, vint32m2_t b, size_t vl);
void __riscv_th_vssb_v_i32m4_m (vbool8_t mask, int32_t *a, size_t stride, vint32m4_t b, size_t vl);
void __riscv_th_vssb_v_i32m8_m (vbool4_t mask, int32_t *a, size_t stride, vint32m8_t b, size_t vl);
void __riscv_th_vssb_v_u8m1_m (vbool8_t mask, uint8_t *a, size_t stride, vuint8m1_t b, size_t vl);
void __riscv_th_vssb_v_u8m2_m (vbool4_t mask, uint8_t *a, size_t stride, vuint8m2_t b, size_t vl);
void __riscv_th_vssb_v_u8m4_m (vbool2_t mask, uint8_t *a, size_t stride, vuint8m4_t b, size_t vl);
void __riscv_th_vssb_v_u8m8_m (vbool1_t mask, uint8_t *a, size_t stride, vuint8m8_t b, size_t vl);
void __riscv_th_vssb_v_u16m1_m (vbool16_t mask, uint16_t *a, size_t stride, vuint16m1_t b, size_t vl);
void __riscv_th_vssb_v_u16m2_m (vbool8_t mask, uint16_t *a, size_t stride, vuint16m2_t b, size_t vl);
void __riscv_th_vssb_v_u16m4_m (vbool4_t mask, uint16_t *a, size_t stride, vuint16m4_t b, size_t vl);
void __riscv_th_vssb_v_u16m8_m (vbool2_t mask, uint16_t *a, size_t stride, vuint16m8_t b, size_t vl);
void __riscv_th_vssb_v_u32m1_m (vbool32_t mask, uint32_t *a, size_t stride, vuint32m1_t b, size_t vl);
void __riscv_th_vssb_v_u32m2_m (vbool16_t mask, uint32_t *a, size_t stride, vuint32m2_t b, size_t vl);
void __riscv_th_vssb_v_u32m4_m (vbool8_t mask, uint32_t *a, size_t stride, vuint32m4_t b, size_t vl);
void __riscv_th_vssb_v_u32m8_m (vbool4_t mask, uint32_t *a, size_t stride, vuint32m8_t b, size_t vl);
void __riscv_th_vssh_v_i16m1_m (vbool16_t mask, int16_t *a, size_t stride, vint16m1_t b, size_t vl);
void __riscv_th_vssh_v_i16m2_m (vbool8_t mask, int16_t *a, size_t stride, vint16m2_t b, size_t vl);
void __riscv_th_vssh_v_i16m4_m (vbool4_t mask, int16_t *a, size_t stride, vint16m4_t b, size_t vl);
void __riscv_th_vssh_v_i16m8_m (vbool2_t mask, int16_t *a, size_t stride, vint16m8_t b, size_t vl);
void __riscv_th_vssh_v_i32m1_m (vbool32_t mask, int32_t *a, size_t stride, vint32m1_t b, size_t vl);
void __riscv_th_vssh_v_i32m2_m (vbool16_t mask, int32_t *a, size_t stride, vint32m2_t b, size_t vl);
void __riscv_th_vssh_v_i32m4_m (vbool8_t mask, int32_t *a, size_t stride, vint32m4_t b, size_t vl);
void __riscv_th_vssh_v_i32m8_m (vbool4_t mask, int32_t *a, size_t stride, vint32m8_t b, size_t vl);
void __riscv_th_vssh_v_u16m1_m (vbool16_t mask, uint16_t *a, size_t stride, vuint16m1_t b, size_t vl);
void __riscv_th_vssh_v_u16m2_m (vbool8_t mask, uint16_t *a, size_t stride, vuint16m2_t b, size_t vl);
void __riscv_th_vssh_v_u16m4_m (vbool4_t mask, uint16_t *a, size_t stride, vuint16m4_t b, size_t vl);
void __riscv_th_vssh_v_u16m8_m (vbool2_t mask, uint16_t *a, size_t stride, vuint16m8_t b, size_t vl);
void __riscv_th_vssh_v_u32m1_m (vbool32_t mask, uint32_t *a, size_t stride, vuint32m1_t b, size_t vl);
void __riscv_th_vssh_v_u32m2_m (vbool16_t mask, uint32_t *a, size_t stride, vuint32m2_t b, size_t vl);
void __riscv_th_vssh_v_u32m4_m (vbool8_t mask, uint32_t *a, size_t stride, vuint32m4_t b, size_t vl);
void __riscv_th_vssh_v_u32m8_m (vbool4_t mask, uint32_t *a, size_t stride, vuint32m8_t b, size_t vl);
void __riscv_th_vssw_v_i32m1_m (vbool32_t mask, int32_t *a, size_t stride, vint32m1_t b, size_t vl);
void __riscv_th_vssw_v_i32m2_m (vbool16_t mask, int32_t *a, size_t stride, vint32m2_t b, size_t vl);
void __riscv_th_vssw_v_i32m4_m (vbool8_t mask, int32_t *a, size_t stride, vint32m4_t b, size_t vl);
void __riscv_th_vssw_v_i32m8_m (vbool4_t mask, int32_t *a, size_t stride, vint32m8_t b, size_t vl);
void __riscv_th_vssw_v_u32m1_m (vbool32_t mask, uint32_t *a, size_t stride, vuint32m1_t b, size_t vl);
void __riscv_th_vssw_v_u32m2_m (vbool16_t mask, uint32_t *a, size_t stride, vuint32m2_t b, size_t vl);
void __riscv_th_vssw_v_u32m4_m (vbool8_t mask, uint32_t *a, size_t stride, vuint32m4_t b, size_t vl);
void __riscv_th_vssw_v_u32m8_m (vbool4_t mask, uint32_t *a, size_t stride, vuint32m8_t b, size_t vl);
XTheadVector Indexed Load Intrinsics
vint8m1_t __riscv_th_vlxb_v_i8m1 (const int8_t *a, vuint8m1_t indexed, size_t vl);
vint8m2_t __riscv_th_vlxb_v_i8m2 (const int8_t *a, vuint8m2_t indexed, size_t vl);
vint8m4_t __riscv_th_vlxb_v_i8m4 (const int8_t *a, vuint8m4_t indexed, size_t vl);
vint8m8_t __riscv_th_vlxb_v_i8m8 (const int8_t *a, vuint8m8_t indexed, size_t vl);
vint16m1_t __riscv_th_vlxb_v_i16m1 (const int16_t *a, vuint16m1_t indexed, size_t vl);
vint16m2_t __riscv_th_vlxb_v_i16m2 (const int16_t *a, vuint16m2_t indexed, size_t vl);
vint16m4_t __riscv_th_vlxb_v_i16m4 (const int16_t *a, vuint16m4_t indexed, size_t vl);
vint16m8_t __riscv_th_vlxb_v_i16m8 (const int16_t *a, vuint16m8_t indexed, size_t vl);
vint32m1_t __riscv_th_vlxb_v_i32m1 (const int32_t *a, vuint32m1_t indexed, size_t vl);
vint32m2_t __riscv_th_vlxb_v_i32m2 (const int32_t *a, vuint32m2_t indexed, size_t vl);
vint32m4_t __riscv_th_vlxb_v_i32m4 (const int32_t *a, vuint32m4_t indexed, size_t vl);
vint32m8_t __riscv_th_vlxb_v_i32m8 (const int32_t *a, vuint32m8_t indexed, size_t vl);
vint64m1_t __riscv_th_vlxb_v_i64m1 (const int64_t *a, vuint64m1_t indexed, size_t vl);
vint64m2_t __riscv_th_vlxb_v_i64m2 (const int64_t *a, vuint64m2_t indexed, size_t vl);
vint64m4_t __riscv_th_vlxb_v_i64m4 (const int64_t *a, vuint64m4_t indexed, size_t vl);
vint64m8_t __riscv_th_vlxb_v_i64m8 (const int64_t *a, vuint64m8_t indexed, size_t vl);
vint16m1_t __riscv_th_vlxh_v_i16m1 (const int16_t *a, vuint16m1_t indexed, size_t vl);
vint16m2_t __riscv_th_vlxh_v_i16m2 (const int16_t *a, vuint16m2_t indexed, size_t vl);
vint16m4_t __riscv_th_vlxh_v_i16m4 (const int16_t *a, vuint16m4_t indexed, size_t vl);
vint16m8_t __riscv_th_vlxh_v_i16m8 (const int16_t *a, vuint16m8_t indexed, size_t vl);
vint32m1_t __riscv_th_vlxh_v_i32m1 (const int32_t *a, vuint32m1_t indexed, size_t vl);
vint32m2_t __riscv_th_vlxh_v_i32m2 (const int32_t *a, vuint32m2_t indexed, size_t vl);
vint32m4_t __riscv_th_vlxh_v_i32m4 (const int32_t *a, vuint32m4_t indexed, size_t vl);
vint32m8_t __riscv_th_vlxh_v_i32m8 (const int32_t *a, vuint32m8_t indexed, size_t vl);
vint64m1_t __riscv_th_vlxh_v_i64m1 (const int64_t *a, vuint64m1_t indexed, size_t vl);
vint64m2_t __riscv_th_vlxh_v_i64m2 (const int64_t *a, vuint64m2_t indexed, size_t vl);
vint64m4_t __riscv_th_vlxh_v_i64m4 (const int64_t *a, vuint64m4_t indexed, size_t vl);
vint64m8_t __riscv_th_vlxh_v_i64m8 (const int64_t *a, vuint64m8_t indexed, size_t vl);
vint32m1_t __riscv_th_vlxw_v_i32m1 (const int32_t *a, vuint32m1_t indexed, size_t vl);
vint32m2_t __riscv_th_vlxw_v_i32m2 (const int32_t *a, vuint32m2_t indexed, size_t vl);
vint32m4_t __riscv_th_vlxw_v_i32m4 (const int32_t *a, vuint32m4_t indexed, size_t vl);
vint32m8_t __riscv_th_vlxw_v_i32m8 (const int32_t *a, vuint32m8_t indexed, size_t vl);
vint64m1_t __riscv_th_vlxw_v_i64m1 (const int64_t *a, vuint64m1_t indexed, size_t vl);
vint64m2_t __riscv_th_vlxw_v_i64m2 (const int64_t *a, vuint64m2_t indexed, size_t vl);
vint64m4_t __riscv_th_vlxw_v_i64m4 (const int64_t *a, vuint64m4_t indexed, size_t vl);
vint64m8_t __riscv_th_vlxw_v_i64m8 (const int64_t *a, vuint64m8_t indexed, size_t vl);
vuint8m1_t __riscv_th_vlxbu_v_u8m1 (const uint8_t *a, vuint8m1_t indexed, size_t vl);
vuint8m2_t __riscv_th_vlxbu_v_u8m2 (const uint8_t *a, vuint8m2_t indexed, size_t vl);
vuint8m4_t __riscv_th_vlxbu_v_u8m4 (const uint8_t *a, vuint8m4_t indexed, size_t vl);
vuint8m8_t __riscv_th_vlxbu_v_u8m8 (const uint8_t *a, vuint8m8_t indexed, size_t vl);
vuint16m1_t __riscv_th_vlxbu_v_u16m1 (const uint16_t *a, vuint16m1_t indexed, size_t vl);
vuint16m2_t __riscv_th_vlxbu_v_u16m2 (const uint16_t *a, vuint16m2_t indexed, size_t vl);
vuint16m4_t __riscv_th_vlxbu_v_u16m4 (const uint16_t *a, vuint16m4_t indexed, size_t vl);
vuint16m8_t __riscv_th_vlxbu_v_u16m8 (const uint16_t *a, vuint16m8_t indexed, size_t vl);
vuint32m1_t __riscv_th_vlxbu_v_u32m1 (const uint32_t *a, vuint32m1_t indexed, size_t vl);
vuint32m2_t __riscv_th_vlxbu_v_u32m2 (const uint32_t *a, vuint32m2_t indexed, size_t vl);
vuint32m4_t __riscv_th_vlxbu_v_u32m4 (const uint32_t *a, vuint32m4_t indexed, size_t vl);
vuint32m8_t __riscv_th_vlxbu_v_u32m8 (const uint32_t *a, vuint32m8_t indexed, size_t vl);
vuint64m1_t __riscv_th_vlxbu_v_u64m1 (const uint64_t *a, vuint64m1_t indexed, size_t vl);
vuint64m2_t __riscv_th_vlxbu_v_u64m2 (const uint64_t *a, vuint64m2_t indexed, size_t vl);
vuint64m4_t __riscv_th_vlxbu_v_u64m4 (const uint64_t *a, vuint64m4_t indexed, size_t vl);
vuint64m8_t __riscv_th_vlxbu_v_u64m8 (const uint64_t *a, vuint64m8_t indexed, size_t vl);
vuint16m1_t __riscv_th_vlxhu_v_u16m1 (const uint16_t *a, vuint16m1_t indexed, size_t vl);
vuint16m2_t __riscv_th_vlxhu_v_u16m2 (const uint16_t *a, vuint16m2_t indexed, size_t vl);
vuint16m4_t __riscv_th_vlxhu_v_u16m4 (const uint16_t *a, vuint16m4_t indexed, size_t vl);
vuint16m8_t __riscv_th_vlxhu_v_u16m8 (const uint16_t *a, vuint16m8_t indexed, size_t vl);
vuint32m1_t __riscv_th_vlxhu_v_u32m1 (const uint32_t *a, vuint32m1_t indexed, size_t vl);
vuint32m2_t __riscv_th_vlxhu_v_u32m2 (const uint32_t *a, vuint32m2_t indexed, size_t vl);
vuint32m4_t __riscv_th_vlxhu_v_u32m4 (const uint32_t *a, vuint32m4_t indexed, size_t vl);
vuint32m8_t __riscv_th_vlxhu_v_u32m8 (const uint32_t *a, vuint32m8_t indexed, size_t vl);
vuint64m1_t __riscv_th_vlxhu_v_u64m1 (const uint64_t *a, vuint64m1_t indexed, size_t vl);
vuint64m2_t __riscv_th_vlxhu_v_u64m2 (const uint64_t *a, vuint64m2_t indexed, size_t vl);
vuint64m4_t __riscv_th_vlxhu_v_u64m4 (const uint64_t *a, vuint64m4_t indexed, size_t vl);
vuint64m8_t __riscv_th_vlxhu_v_u64m8 (const uint64_t *a, vuint64m8_t indexed, size_t vl);
vuint32m1_t __riscv_th_vlxwu_v_u32m1 (const uint32_t *a, vuint32m1_t indexed, size_t vl);
vuint32m2_t __riscv_th_vlxwu_v_u32m2 (const uint32_t *a, vuint32m2_t indexed, size_t vl);
vuint32m4_t __riscv_th_vlxwu_v_u32m4 (const uint32_t *a, vuint32m4_t indexed, size_t vl);
vuint32m8_t __riscv_th_vlxwu_v_u32m8 (const uint32_t *a, vuint32m8_t indexed, size_t vl);
vuint64m1_t __riscv_th_vlxwu_v_u64m1 (const uint64_t *a, vuint64m1_t indexed, size_t vl);
vuint64m2_t __riscv_th_vlxwu_v_u64m2 (const uint64_t *a, vuint64m2_t indexed, size_t vl);
vuint64m4_t __riscv_th_vlxwu_v_u64m4 (const uint64_t *a, vuint64m4_t indexed, size_t vl);
vuint64m8_t __riscv_th_vlxwu_v_u64m8 (const uint64_t *a, vuint64m8_t indexed, size_t vl);
// masked functions
vint8m1_t __riscv_th_vlxb_v_i8m1_m (vbool8_t mask, const int8_t *a, vuint8m1_t indexed, size_t vl);
vint8m2_t __riscv_th_vlxb_v_i8m2_m (vbool4_t mask, const int8_t *a, vuint8m2_t indexed, size_t vl);
vint8m4_t __riscv_th_vlxb_v_i8m4_m (vbool2_t mask, const int8_t *a, vuint8m4_t indexed, size_t vl);
vint8m8_t __riscv_th_vlxb_v_i8m8_m (vbool1_t mask, const int8_t *a, vuint8m8_t indexed, size_t vl);
vint16m1_t __riscv_th_vlxb_v_i16m1_m (vbool16_t mask, const int16_t *a, vuint16m1_t indexed, size_t vl);
vint16m2_t __riscv_th_vlxb_v_i16m2_m (vbool8_t mask, const int16_t *a, vuint16m2_t indexed, size_t vl);
vint16m4_t __riscv_th_vlxb_v_i16m4_m (vbool4_t mask, const int16_t *a, vuint16m4_t indexed, size_t vl);
vint16m8_t __riscv_th_vlxb_v_i16m8_m (vbool2_t mask, const int16_t *a, vuint16m8_t indexed, size_t vl);
vint32m1_t __riscv_th_vlxb_v_i32m1_m (vbool32_t mask, const int32_t *a, vuint32m1_t indexed, size_t vl);
vint32m2_t __riscv_th_vlxb_v_i32m2_m (vbool16_t mask, const int32_t *a, vuint32m2_t indexed, size_t vl);
vint32m4_t __riscv_th_vlxb_v_i32m4_m (vbool8_t mask, const int32_t *a, vuint32m4_t indexed, size_t vl);
vint32m8_t __riscv_th_vlxb_v_i32m8_m (vbool4_t mask, const int32_t *a, vuint32m8_t indexed, size_t vl);
vint64m1_t __riscv_th_vlxb_v_i64m1_m (vbool64_t mask, const int64_t *a, vuint64m1_t indexed, size_t vl);
vint64m2_t __riscv_th_vlxb_v_i64m2_m (vbool32_t mask, const int64_t *a, vuint64m2_t indexed, size_t vl);
vint64m4_t __riscv_th_vlxb_v_i64m4_m (vbool16_t mask, const int64_t *a, vuint64m4_t indexed, size_t vl);
vint64m8_t __riscv_th_vlxb_v_i64m8_m (vbool8_t mask, const int64_t *a, vuint64m8_t indexed, size_t vl);
vint16m1_t __riscv_th_vlxh_v_i16m1_m (vbool16_t mask, const int16_t *a, vuint16m1_t indexed, size_t vl);
vint16m2_t __riscv_th_vlxh_v_i16m2_m (vbool8_t mask, const int16_t *a, vuint16m2_t indexed, size_t vl);
vint16m4_t __riscv_th_vlxh_v_i16m4_m (vbool4_t mask, const int16_t *a, vuint16m4_t indexed, size_t vl);
vint16m8_t __riscv_th_vlxh_v_i16m8_m (vbool2_t mask, const int16_t *a, vuint16m8_t indexed, size_t vl);
vint32m1_t __riscv_th_vlxh_v_i32m1_m (vbool32_t mask, const int32_t *a, vuint32m1_t indexed, size_t vl);
vint32m2_t __riscv_th_vlxh_v_i32m2_m (vbool16_t mask, const int32_t *a, vuint32m2_t indexed, size_t vl);
vint32m4_t __riscv_th_vlxh_v_i32m4_m (vbool8_t mask, const int32_t *a, vuint32m4_t indexed, size_t vl);
vint32m8_t __riscv_th_vlxh_v_i32m8_m (vbool4_t mask, const int32_t *a, vuint32m8_t indexed, size_t vl);
vint64m1_t __riscv_th_vlxh_v_i64m1_m (vbool64_t mask, const int64_t *a, vuint64m1_t indexed, size_t vl);
vint64m2_t __riscv_th_vlxh_v_i64m2_m (vbool32_t mask, const int64_t *a, vuint64m2_t indexed, size_t vl);
vint64m4_t __riscv_th_vlxh_v_i64m4_m (vbool16_t mask, const int64_t *a, vuint64m4_t indexed, size_t vl);
vint64m8_t __riscv_th_vlxh_v_i64m8_m (vbool8_t mask, const int64_t *a, vuint64m8_t indexed, size_t vl);
vint32m1_t __riscv_th_vlxw_v_i32m1_m (vbool32_t mask, const int32_t *a, vuint32m1_t indexed, size_t vl);
vint32m2_t __riscv_th_vlxw_v_i32m2_m (vbool16_t mask, const int32_t *a, vuint32m2_t indexed, size_t vl);
vint32m4_t __riscv_th_vlxw_v_i32m4_m (vbool8_t mask, const int32_t *a, vuint32m4_t indexed, size_t vl);
vint32m8_t __riscv_th_vlxw_v_i32m8_m (vbool4_t mask, const int32_t *a, vuint32m8_t indexed, size_t vl);
vint64m1_t __riscv_th_vlxw_v_i64m1_m (vbool64_t mask, const int64_t *a, vuint64m1_t indexed, size_t vl);
vint64m2_t __riscv_th_vlxw_v_i64m2_m (vbool32_t mask, const int64_t *a, vuint64m2_t indexed, size_t vl);
vint64m4_t __riscv_th_vlxw_v_i64m4_m (vbool16_t mask, const int64_t *a, vuint64m4_t indexed, size_t vl);
vint64m8_t __riscv_th_vlxw_v_i64m8_m (vbool8_t mask, const int64_t *a, vuint64m8_t indexed, size_t vl);
vuint8m1_t __riscv_th_vlxbu_v_u8m1_m (vbool8_t mask, const uint8_t *a, vuint8m1_t indexed, size_t vl);
vuint8m2_t __riscv_th_vlxbu_v_u8m2_m (vbool4_t mask, const uint8_t *a, vuint8m2_t indexed, size_t vl);
vuint8m4_t __riscv_th_vlxbu_v_u8m4_m (vbool2_t mask, const uint8_t *a, vuint8m4_t indexed, size_t vl);
vuint8m8_t __riscv_th_vlxbu_v_u8m8_m (vbool1_t mask, const uint8_t *a, vuint8m8_t indexed, size_t vl);
vuint16m1_t __riscv_th_vlxbu_v_u16m1_m (vbool16_t mask, const uint16_t *a, vuint16m1_t indexed, size_t vl);
vuint16m2_t __riscv_th_vlxbu_v_u16m2_m (vbool8_t mask, const uint16_t *a, vuint16m2_t indexed, size_t vl);
vuint16m4_t __riscv_th_vlxbu_v_u16m4_m (vbool4_t mask, const uint16_t *a, vuint16m4_t indexed, size_t vl);
vuint16m8_t __riscv_th_vlxbu_v_u16m8_m (vbool2_t mask, const uint16_t *a, vuint16m8_t indexed, size_t vl);
vuint32m1_t __riscv_th_vlxbu_v_u32m1_m (vbool32_t mask, const uint32_t *a, vuint32m1_t indexed, size_t vl);
vuint32m2_t __riscv_th_vlxbu_v_u32m2_m (vbool16_t mask, const uint32_t *a, vuint32m2_t indexed, size_t vl);
vuint32m4_t __riscv_th_vlxbu_v_u32m4_m (vbool8_t mask, const uint32_t *a, vuint32m4_t indexed, size_t vl);
vuint32m8_t __riscv_th_vlxbu_v_u32m8_m (vbool4_t mask, const uint32_t *a, vuint32m8_t indexed, size_t vl);
vuint64m1_t __riscv_th_vlxbu_v_u64m1_m (vbool64_t mask, const uint64_t *a, vuint64m1_t indexed, size_t vl);
vuint64m2_t __riscv_th_vlxbu_v_u64m2_m (vbool32_t mask, const uint64_t *a, vuint64m2_t indexed, size_t vl);
vuint64m4_t __riscv_th_vlxbu_v_u64m4_m (vbool16_t mask, const uint64_t *a, vuint64m4_t indexed, size_t vl);
vuint64m8_t __riscv_th_vlxbu_v_u64m8_m (vbool8_t mask, const uint64_t *a, vuint64m8_t indexed, size_t vl);
vuint16m1_t __riscv_th_vlxhu_v_u16m1_m (vbool16_t mask, const uint16_t *a, vuint16m1_t indexed, size_t vl);
vuint16m2_t __riscv_th_vlxhu_v_u16m2_m (vbool8_t mask, const uint16_t *a, vuint16m2_t indexed, size_t vl);
vuint16m4_t __riscv_th_vlxhu_v_u16m4_m (vbool4_t mask, const uint16_t *a, vuint16m4_t indexed, size_t vl);
vuint16m8_t __riscv_th_vlxhu_v_u16m8_m (vbool2_t mask, const uint16_t *a, vuint16m8_t indexed, size_t vl);
vuint32m1_t __riscv_th_vlxhu_v_u32m1_m (vbool32_t mask, const uint32_t *a, vuint32m1_t indexed, size_t vl);
vuint32m2_t __riscv_th_vlxhu_v_u32m2_m (vbool16_t mask, const uint32_t *a, vuint32m2_t indexed, size_t vl);
vuint32m4_t __riscv_th_vlxhu_v_u32m4_m (vbool8_t mask, const uint32_t *a, vuint32m4_t indexed, size_t vl);
vuint32m8_t __riscv_th_vlxhu_v_u32m8_m (vbool4_t mask, const uint32_t *a, vuint32m8_t indexed, size_t vl);
vuint64m1_t __riscv_th_vlxhu_v_u64m1_m (vbool64_t mask, const uint64_t *a, vuint64m1_t indexed, size_t vl);
vuint64m2_t __riscv_th_vlxhu_v_u64m2_m (vbool32_t mask, const uint64_t *a, vuint64m2_t indexed, size_t vl);
vuint64m4_t __riscv_th_vlxhu_v_u64m4_m (vbool16_t mask, const uint64_t *a, vuint64m4_t indexed, size_t vl);
vuint64m8_t __riscv_th_vlxhu_v_u64m8_m (vbool8_t mask, const uint64_t *a, vuint64m8_t indexed, size_t vl);
vuint32m1_t __riscv_th_vlxwu_v_u32m1_m (vbool32_t mask, const uint32_t *a, vuint32m1_t indexed, size_t vl);
vuint32m2_t __riscv_th_vlxwu_v_u32m2_m (vbool16_t mask, const uint32_t *a, vuint32m2_t indexed, size_t vl);
vuint32m4_t __riscv_th_vlxwu_v_u32m4_m (vbool8_t mask, const uint32_t *a, vuint32m4_t indexed, size_t vl);
vuint32m8_t __riscv_th_vlxwu_v_u32m8_m (vbool4_t mask, const uint32_t *a, vuint32m8_t indexed, size_t vl);
vuint64m1_t __riscv_th_vlxwu_v_u64m1_m (vbool64_t mask, const uint64_t *a, vuint64m1_t indexed, size_t vl);
vuint64m2_t __riscv_th_vlxwu_v_u64m2_m (vbool32_t mask, const uint64_t *a, vuint64m2_t indexed, size_t vl);
vuint64m4_t __riscv_th_vlxwu_v_u64m4_m (vbool16_t mask, const uint64_t *a, vuint64m4_t indexed, size_t vl);
vuint64m8_t __riscv_th_vlxwu_v_u64m8_m (vbool8_t mask, const uint64_t *a, vuint64m8_t indexed, size_t vl);
XTheadVector Indexed Store Intrinsics
void __riscv_th_vsxb_v_i8m1 (int8_t *a, vuint8m1_t indexed, vint8m1_t value, size_t vl);
void __riscv_th_vsxb_v_i8m2 (int8_t *a, vuint8m2_t indexed, vint8m2_t value, size_t vl);
void __riscv_th_vsxb_v_i8m4 (int8_t *a, vuint8m4_t indexed, vint8m4_t value, size_t vl);
void __riscv_th_vsxb_v_i8m8 (int8_t *a, vuint8m8_t indexed, vint8m8_t value, size_t vl);
void __riscv_th_vsxb_v_i16m1 (int16_t *a, vuint16m1_t indexed, vint16m1_t value, size_t vl);
void __riscv_th_vsxb_v_i16m2 (int16_t *a, vuint16m2_t indexed, vint16m2_t value, size_t vl);
void __riscv_th_vsxb_v_i16m4 (int16_t *a, vuint16m4_t indexed, vint16m4_t value, size_t vl);
void __riscv_th_vsxb_v_i16m8 (int16_t *a, vuint16m8_t indexed, vint16m8_t value, size_t vl);
void __riscv_th_vsxb_v_i32m1 (int32_t *a, vuint32m1_t indexed, vint32m1_t value, size_t vl);
void __riscv_th_vsxb_v_i32m2 (int32_t *a, vuint32m2_t indexed, vint32m2_t value, size_t vl);
void __riscv_th_vsxb_v_i32m4 (int32_t *a, vuint32m4_t indexed, vint32m4_t value, size_t vl);
void __riscv_th_vsxb_v_i32m8 (int32_t *a, vuint32m8_t indexed, vint32m8_t value, size_t vl);
void __riscv_th_vsuxb_v_i8m1 (int8_t *a, vuint8m1_t indexed, vint8m1_t value, size_t vl);
void __riscv_th_vsuxb_v_i8m2 (int8_t *a, vuint8m2_t indexed, vint8m2_t value, size_t vl);
void __riscv_th_vsuxb_v_i8m4 (int8_t *a, vuint8m4_t indexed, vint8m4_t value, size_t vl);
void __riscv_th_vsuxb_v_i8m8 (int8_t *a, vuint8m8_t indexed, vint8m8_t value, size_t vl);
void __riscv_th_vsuxb_v_i16m1 (int16_t *a, vuint16m1_t indexed, vint16m1_t value, size_t vl);
void __riscv_th_vsuxb_v_i16m2 (int16_t *a, vuint16m2_t indexed, vint16m2_t value, size_t vl);
void __riscv_th_vsuxb_v_i16m4 (int16_t *a, vuint16m4_t indexed, vint16m4_t value, size_t vl);
void __riscv_th_vsuxb_v_i16m8 (int16_t *a, vuint16m8_t indexed, vint16m8_t value, size_t vl);
void __riscv_th_vsuxb_v_i32m1 (int32_t *a, vuint32m1_t indexed, vint32m1_t value, size_t vl);
void __riscv_th_vsuxb_v_i32m2 (int32_t *a, vuint32m2_t indexed, vint32m2_t value, size_t vl);
void __riscv_th_vsuxb_v_i32m4 (int32_t *a, vuint32m4_t indexed, vint32m4_t value, size_t vl);
void __riscv_th_vsuxb_v_i32m8 (int32_t *a, vuint32m8_t indexed, vint32m8_t value, size_t vl);
void __riscv_th_vsxb_v_u8m1 (uint8_t *a, vuint8m1_t indexed, vuint8m1_t value, size_t vl);
void __riscv_th_vsxb_v_u8m2 (uint8_t *a, vuint8m2_t indexed, vuint8m2_t value, size_t vl);
void __riscv_th_vsxb_v_u8m4 (uint8_t *a, vuint8m4_t indexed, vuint8m4_t value, size_t vl);
void __riscv_th_vsxb_v_u8m8 (uint8_t *a, vuint8m8_t indexed, vuint8m8_t value, size_t vl);
void __riscv_th_vsxb_v_u16m1 (uint16_t *a, vuint16m1_t indexed, vuint16m1_t value, size_t vl);
void __riscv_th_vsxb_v_u16m2 (uint16_t *a, vuint16m2_t indexed, vuint16m2_t value, size_t vl);
void __riscv_th_vsxb_v_u16m4 (uint16_t *a, vuint16m4_t indexed, vuint16m4_t value, size_t vl);
void __riscv_th_vsxb_v_u16m8 (uint16_t *a, vuint16m8_t indexed, vuint16m8_t value, size_t vl);
void __riscv_th_vsxb_v_u32m1 (uint32_t *a, vuint32m1_t indexed, vuint32m1_t value, size_t vl);
void __riscv_th_vsxb_v_u32m2 (uint32_t *a, vuint32m2_t indexed, vuint32m2_t value, size_t vl);
void __riscv_th_vsxb_v_u32m4 (uint32_t *a, vuint32m4_t indexed, vuint32m4_t value, size_t vl);
void __riscv_th_vsxb_v_u32m8 (uint32_t *a, vuint32m8_t indexed, vuint32m8_t value, size_t vl);
void __riscv_th_vsuxb_v_u8m1 (uint8_t *a, vuint8m1_t indexed, vuint8m1_t value, size_t vl);
void __riscv_th_vsuxb_v_u8m2 (uint8_t *a, vuint8m2_t indexed, vuint8m2_t value, size_t vl);
void __riscv_th_vsuxb_v_u8m4 (uint8_t *a, vuint8m4_t indexed, vuint8m4_t value, size_t vl);
void __riscv_th_vsuxb_v_u8m8 (uint8_t *a, vuint8m8_t indexed, vuint8m8_t value, size_t vl);
void __riscv_th_vsuxb_v_u16m1 (uint16_t *a, vuint16m1_t indexed, vuint16m1_t value, size_t vl);
void __riscv_th_vsuxb_v_u16m2 (uint16_t *a, vuint16m2_t indexed, vuint16m2_t value, size_t vl);
void __riscv_th_vsuxb_v_u16m4 (uint16_t *a, vuint16m4_t indexed, vuint16m4_t value, size_t vl);
void __riscv_th_vsuxb_v_u16m8 (uint16_t *a, vuint16m8_t indexed, vuint16m8_t value, size_t vl);
void __riscv_th_vsuxb_v_u32m1 (uint32_t *a, vuint32m1_t indexed, vuint32m1_t value, size_t vl);
void __riscv_th_vsuxb_v_u32m2 (uint32_t *a, vuint32m2_t indexed, vuint32m2_t value, size_t vl);
void __riscv_th_vsuxb_v_u32m4 (uint32_t *a, vuint32m4_t indexed, vuint32m4_t value, size_t vl);
void __riscv_th_vsuxb_v_u32m8 (uint32_t *a, vuint32m8_t indexed, vuint32m8_t value, size_t vl);
// masked functions
void __riscv_th_vsxb_v_i8m1_m (vbool8_t mask, int8_t *a, vuint8m1_t indexed, vint8m1_t value, size_t vl);
void __riscv_th_vsxb_v_i8m2_m (vbool4_t mask, int8_t *a, vuint8m2_t indexed, vint8m2_t value, size_t vl);
void __riscv_th_vsxb_v_i8m4_m (vbool2_t mask, int8_t *a, vuint8m4_t indexed, vint8m4_t value, size_t vl);
void __riscv_th_vsxb_v_i8m8_m (vbool1_t mask, int8_t *a, vuint8m8_t indexed, vint8m8_t value, size_t vl);
void __riscv_th_vsxb_v_i16m1_m (vbool16_t mask, int16_t *a, vuint16m1_t indexed, vint16m1_t value, size_t vl);
void __riscv_th_vsxb_v_i16m2_m (vbool8_t mask, int16_t *a, vuint16m2_t indexed, vint16m2_t value, size_t vl);
void __riscv_th_vsxb_v_i16m4_m (vbool4_t mask, int16_t *a, vuint16m4_t indexed, vint16m4_t value, size_t vl);
void __riscv_th_vsxb_v_i16m8_m (vbool2_t mask, int16_t *a, vuint16m8_t indexed, vint16m8_t value, size_t vl);
void __riscv_th_vsxb_v_i32m1_m (vbool32_t mask, int32_t *a, vuint32m1_t indexed, vint32m1_t value, size_t vl);
void __riscv_th_vsxb_v_i32m2_m (vbool16_t mask, int32_t *a, vuint32m2_t indexed, vint32m2_t value, size_t vl);
void __riscv_th_vsxb_v_i32m4_m (vbool8_t mask, int32_t *a, vuint32m4_t indexed, vint32m4_t value, size_t vl);
void __riscv_th_vsxb_v_i32m8_m (vbool4_t mask, int32_t *a, vuint32m8_t indexed, vint32m8_t value, size_t vl);
void __riscv_th_vsuxb_v_i8m1_m (vbool8_t mask, int8_t *a, vuint8m1_t indexed, vint8m1_t value, size_t vl);
void __riscv_th_vsuxb_v_i8m2_m (vbool4_t mask, int8_t *a, vuint8m2_t indexed, vint8m2_t value, size_t vl);
void __riscv_th_vsuxb_v_i8m4_m (vbool2_t mask, int8_t *a, vuint8m4_t indexed, vint8m4_t value, size_t vl);
void __riscv_th_vsuxb_v_i8m8_m (vbool1_t mask, int8_t *a, vuint8m8_t indexed, vint8m8_t value, size_t vl);
void __riscv_th_vsuxb_v_i16m1_m (vbool16_t mask, int16_t *a, vuint16m1_t indexed, vint16m1_t value, size_t vl);
void __riscv_th_vsuxb_v_i16m2_m (vbool8_t mask, int16_t *a, vuint16m2_t indexed, vint16m2_t value, size_t vl);
void __riscv_th_vsuxb_v_i16m4_m (vbool4_t mask, int16_t *a, vuint16m4_t indexed, vint16m4_t value, size_t vl);
void __riscv_th_vsuxb_v_i16m8_m (vbool2_t mask, int16_t *a, vuint16m8_t indexed, vint16m8_t value, size_t vl);
void __riscv_th_vsuxb_v_i32m1_m (vbool32_t mask, int32_t *a, vuint32m1_t indexed, vint32m1_t value, size_t vl);
void __riscv_th_vsuxb_v_i32m2_m (vbool16_t mask, int32_t *a, vuint32m2_t indexed, vint32m2_t value, size_t vl);
void __riscv_th_vsuxb_v_i32m4_m (vbool8_t mask, int32_t *a, vuint32m4_t indexed, vint32m4_t value, size_t vl);
void __riscv_th_vsuxb_v_i32m8_m (vbool4_t mask, int32_t *a, vuint32m8_t indexed, vint32m8_t value, size_t vl);
void __riscv_th_vsxb_v_u8m1_m (vbool8_t mask, uint8_t *a, vuint8m1_t indexed, vuint8m1_t value, size_t vl);
void __riscv_th_vsxb_v_u8m2_m (vbool4_t mask, uint8_t *a, vuint8m2_t indexed, vuint8m2_t value, size_t vl);
void __riscv_th_vsxb_v_u8m4_m (vbool2_t mask, uint8_t *a, vuint8m4_t indexed, vuint8m4_t value, size_t vl);
void __riscv_th_vsxb_v_u8m8_m (vbool1_t mask, uint8_t *a, vuint8m8_t indexed, vuint8m8_t value, size_t vl);
void __riscv_th_vsxb_v_u16m1_m (vbool16_t mask, uint16_t *a, vuint16m1_t indexed, vuint16m1_t value, size_t vl);
void __riscv_th_vsxb_v_u16m2_m (vbool8_t mask, uint16_t *a, vuint16m2_t indexed, vuint16m2_t value, size_t vl);
void __riscv_th_vsxb_v_u16m4_m (vbool4_t mask, uint16_t *a, vuint16m4_t indexed, vuint16m4_t value, size_t vl);
void __riscv_th_vsxb_v_u16m8_m (vbool2_t mask, uint16_t *a, vuint16m8_t indexed, vuint16m8_t value, size_t vl);
void __riscv_th_vsxb_v_u32m1_m (vbool32_t mask, uint32_t *a, vuint32m1_t indexed, vuint32m1_t value, size_t vl);
void __riscv_th_vsxb_v_u32m2_m (vbool16_t mask, uint32_t *a, vuint32m2_t indexed, vuint32m2_t value, size_t vl);
void __riscv_th_vsxb_v_u32m4_m (vbool8_t mask, uint32_t *a, vuint32m4_t indexed, vuint32m4_t value, size_t vl);
void __riscv_th_vsxb_v_u32m8_m (vbool4_t mask, uint32_t *a, vuint32m8_t indexed, vuint32m8_t value, size_t vl);
void __riscv_th_vsuxb_v_u8m1_m (vbool8_t mask, uint8_t *a, vuint8m1_t indexed, vuint8m1_t value, size_t vl);
void __riscv_th_vsuxb_v_u8m2_m (vbool4_t mask, uint8_t *a, vuint8m2_t indexed, vuint8m2_t value, size_t vl);
void __riscv_th_vsuxb_v_u8m4_m (vbool2_t mask, uint8_t *a, vuint8m4_t indexed, vuint8m4_t value, size_t vl);
void __riscv_th_vsuxb_v_u8m8_m (vbool1_t mask, uint8_t *a, vuint8m8_t indexed, vuint8m8_t value, size_t vl);
void __riscv_th_vsuxb_v_u16m1_m (vbool16_t mask, uint16_t *a, vuint16m1_t indexed, vuint16m1_t value, size_t vl);
void __riscv_th_vsuxb_v_u16m2_m (vbool8_t mask, uint16_t *a, vuint16m2_t indexed, vuint16m2_t value, size_t vl);
void __riscv_th_vsuxb_v_u16m4_m (vbool4_t mask, uint16_t *a, vuint16m4_t indexed, vuint16m4_t value, size_t vl);
void __riscv_th_vsuxb_v_u16m8_m (vbool2_t mask, uint16_t *a, vuint16m8_t indexed, vuint16m8_t value, size_t vl);
void __riscv_th_vsuxb_v_u32m1_m (vbool32_t mask, uint32_t *a, vuint32m1_t indexed, vuint32m1_t value, size_t vl);
void __riscv_th_vsuxb_v_u32m2_m (vbool16_t mask, uint32_t *a, vuint32m2_t indexed, vuint32m2_t value, size_t vl);
void __riscv_th_vsuxb_v_u32m4_m (vbool8_t mask, uint32_t *a, vuint32m4_t indexed, vuint32m4_t value, size_t vl);
void __riscv_th_vsuxb_v_u32m8_m (vbool4_t mask, uint32_t *a, vuint32m8_t indexed, vuint32m8_t value, size_t vl);
void __riscv_th_vsxh_v_i16m1_m (vbool16_t mask, int16_t *a, vuint16m1_t indexed, vint16m1_t value, size_t vl);
void __riscv_th_vsxh_v_i16m2_m (vbool8_t mask, int16_t *a, vuint16m2_t indexed, vint16m2_t value, size_t vl);
void __riscv_th_vsxh_v_i16m4_m (vbool4_t mask, int16_t *a, vuint16m4_t indexed, vint16m4_t value, size_t vl);
void __riscv_th_vsxh_v_i16m8_m (vbool2_t mask, int16_t *a, vuint16m8_t indexed, vint16m8_t value, size_t vl);
void __riscv_th_vsxh_v_i32m1_m (vbool32_t mask, int32_t *a, vuint32m1_t indexed, vint32m1_t value, size_t vl);
void __riscv_th_vsxh_v_i32m2_m (vbool16_t mask, int32_t *a, vuint32m2_t indexed, vint32m2_t value, size_t vl);
void __riscv_th_vsxh_v_i32m4_m (vbool8_t mask, int32_t *a, vuint32m4_t indexed, vint32m4_t value, size_t vl);
void __riscv_th_vsxh_v_i32m8_m (vbool4_t mask, int32_t *a, vuint32m8_t indexed, vint32m8_t value, size_t vl);
void __riscv_th_vsuxh_v_i16m1_m (vbool16_t mask, int16_t *a, vuint16m1_t indexed, vint16m1_t value, size_t vl);
void __riscv_th_vsuxh_v_i16m2_m (vbool8_t mask, int16_t *a, vuint16m2_t indexed, vint16m2_t value, size_t vl);
void __riscv_th_vsuxh_v_i16m4_m (vbool4_t mask, int16_t *a, vuint16m4_t indexed, vint16m4_t value, size_t vl);
void __riscv_th_vsuxh_v_i16m8_m (vbool2_t mask, int16_t *a, vuint16m8_t indexed, vint16m8_t value, size_t vl);
void __riscv_th_vsuxh_v_i32m1_m (vbool32_t mask, int32_t *a, vuint32m1_t indexed, vint32m1_t value, size_t vl);
void __riscv_th_vsuxh_v_i32m2_m (vbool16_t mask, int32_t *a, vuint32m2_t indexed, vint32m2_t value, size_t vl);
void __riscv_th_vsuxh_v_i32m4_m (vbool8_t mask, int32_t *a, vuint32m4_t indexed, vint32m4_t value, size_t vl);
void __riscv_th_vsuxh_v_i32m8_m (vbool4_t mask, int32_t *a, vuint32m8_t indexed, vint32m8_t value, size_t vl);
void __riscv_th_vsxh_v_u16m1_m (vbool16_t mask, uint16_t *a, vuint16m1_t indexed, vuint16m1_t value, size_t vl);
void __riscv_th_vsxh_v_u16m2_m (vbool8_t mask, uint16_t *a, vuint16m2_t indexed, vuint16m2_t value, size_t vl);
void __riscv_th_vsxh_v_u16m4_m (vbool4_t mask, uint16_t *a, vuint16m4_t indexed, vuint16m4_t value, size_t vl);
void __riscv_th_vsxh_v_u16m8_m (vbool2_t mask, uint16_t *a, vuint16m8_t indexed, vuint16m8_t value, size_t vl);
void __riscv_th_vsxh_v_u32m1_m (vbool32_t mask, uint32_t *a, vuint32m1_t indexed, vuint32m1_t value, size_t vl);
void __riscv_th_vsxh_v_u32m2_m (vbool16_t mask, uint32_t *a, vuint32m2_t indexed, vuint32m2_t value, size_t vl);
void __riscv_th_vsxh_v_u32m4_m (vbool8_t mask, uint32_t *a, vuint32m4_t indexed, vuint32m4_t value, size_t vl);
void __riscv_th_vsxh_v_u32m8_m (vbool4_t mask, uint32_t *a, vuint32m8_t indexed, vuint32m8_t value, size_t vl);
void __riscv_th_vsuxh_v_u16m1_m (vbool16_t mask, uint16_t *a, vuint16m1_t indexed, vuint16m1_t value, size_t vl);
void __riscv_th_vsuxh_v_u16m2_m (vbool8_t mask, uint16_t *a, vuint16m2_t indexed, vuint16m2_t value, size_t vl);
void __riscv_th_vsuxh_v_u16m4_m (vbool4_t mask, uint16_t *a, vuint16m4_t indexed, vuint16m4_t value, size_t vl);
void __riscv_th_vsuxh_v_u16m8_m (vbool2_t mask, uint16_t *a, vuint16m8_t indexed, vuint16m8_t value, size_t vl);
void __riscv_th_vsuxh_v_u32m1_m (vbool32_t mask, uint32_t *a, vuint32m1_t indexed, vuint32m1_t value, size_t vl);
void __riscv_th_vsuxh_v_u32m2_m (vbool16_t mask, uint32_t *a, vuint32m2_t indexed, vuint32m2_t value, size_t vl);
void __riscv_th_vsuxh_v_u32m4_m (vbool8_t mask, uint32_t *a, vuint32m4_t indexed, vuint32m4_t value, size_t vl);
void __riscv_th_vsuxh_v_u32m8_m (vbool4_t mask, uint32_t *a, vuint32m8_t indexed, vuint32m8_t value, size_t vl);
void __riscv_th_vsxw_v_i32m1_m (vbool32_t mask, int32_t *a, vuint32m1_t indexed, vint32m1_t value, size_t vl);
void __riscv_th_vsxw_v_i32m2_m (vbool16_t mask, int32_t *a, vuint32m2_t indexed, vint32m2_t value, size_t vl);
void __riscv_th_vsxw_v_i32m4_m (vbool8_t mask, int32_t *a, vuint32m4_t indexed, vint32m4_t value, size_t vl);
void __riscv_th_vsxw_v_i32m8_m (vbool4_t mask, int32_t *a, vuint32m8_t indexed, vint32m8_t value, size_t vl);
void __riscv_th_vsuxw_v_i32m1_m (vbool32_t mask, int32_t *a, vuint32m1_t indexed, vint32m1_t value, size_t vl);
void __riscv_th_vsuxw_v_i32m2_m (vbool16_t mask, int32_t *a, vuint32m2_t indexed, vint32m2_t value, size_t vl);
void __riscv_th_vsuxw_v_i32m4_m (vbool8_t mask, int32_t *a, vuint32m4_t indexed, vint32m4_t value, size_t vl);
void __riscv_th_vsuxw_v_i32m8_m (vbool4_t mask, int32_t *a, vuint32m8_t indexed, vint32m8_t value, size_t vl);
void __riscv_th_vsxw_v_u32m1_m (vbool32_t mask, uint32_t *a, vuint32m1_t indexed, vuint32m1_t value, size_t vl);
void __riscv_th_vsxw_v_u32m2_m (vbool16_t mask, uint32_t *a, vuint32m2_t indexed, vuint32m2_t value, size_t vl);
void __riscv_th_vsxw_v_u32m4_m (vbool8_t mask, uint32_t *a, vuint32m4_t indexed, vuint32m4_t value, size_t vl);
void __riscv_th_vsxw_v_u32m8_m (vbool4_t mask, uint32_t *a, vuint32m8_t indexed, vuint32m8_t value, size_t vl);
void __riscv_th_vsuxw_v_u32m1_m (vbool32_t mask, uint32_t *a, vuint32m1_t indexed, vuint32m1_t value, size_t vl);
void __riscv_th_vsuxw_v_u32m2_m (vbool16_t mask, uint32_t *a, vuint32m2_t indexed, vuint32m2_t value, size_t vl);
void __riscv_th_vsuxw_v_u32m4_m (vbool8_t mask, uint32_t *a, vuint32m4_t indexed, vuint32m4_t value, size_t vl);
void __riscv_th_vsuxw_v_u32m8_m (vbool4_t mask, uint32_t *a, vuint32m8_t indexed, vuint32m8_t value, size_t vl);
XTheadVector Integer Extract Intrinsics
int8_t __riscv_th_vext_x_v_i8m1_i8 (vint8m1_t a, unsigned idx);
int8_t __riscv_th_vext_x_v_i8m2_i8 (vint8m2_t a, unsigned idx);
int8_t __riscv_th_vext_x_v_i8m4_i8 (vint8m4_t a, unsigned idx);
int8_t __riscv_th_vext_x_v_i8m8_i8 (vint8m8_t a, unsigned idx);
int16_t __riscv_th_vext_x_v_i16m1_i16 (vint16m1_t a, unsigned idx);
int16_t __riscv_th_vext_x_v_i16m2_i16 (vint16m2_t a, unsigned idx);
int16_t __riscv_th_vext_x_v_i16m4_i16 (vint16m4_t a, unsigned idx);
int16_t __riscv_th_vext_x_v_i16m8_i16 (vint16m8_t a, unsigned idx);
int32_t __riscv_th_vext_x_v_i32m1_i32 (vint32m1_t a, unsigned idx);
int32_t __riscv_th_vext_x_v_i32m2_i32 (vint32m2_t a, unsigned idx);
int32_t __riscv_th_vext_x_v_i32m4_i32 (vint32m4_t a, unsigned idx);
int32_t __riscv_th_vext_x_v_i32m8_i32 (vint32m8_t a, unsigned idx);
int64_t __riscv_th_vext_x_v_i64m1_i64 (vint64m1_t a, unsigned idx);
int64_t __riscv_th_vext_x_v_i64m2_i64 (vint64m2_t a, unsigned idx);
int64_t __riscv_th_vext_x_v_i64m4_i64 (vint64m4_t a, unsigned idx);
int64_t __riscv_th_vext_x_v_i64m8_i64 (vint64m8_t a, unsigned idx);
uint8_t __riscv_th_vext_x_v_u8m1_u8 (vuint8m1_t a, unsigned idx);
uint8_t __riscv_th_vext_x_v_u8m2_u8 (vuint8m2_t a, unsigned idx);
uint8_t __riscv_th_vext_x_v_u8m4_u8 (vuint8m4_t a, unsigned idx);
uint8_t __riscv_th_vext_x_v_u8m8_u8 (vuint8m8_t a, unsigned idx);
uint16_t __riscv_th_vext_x_v_u16m1_u16 (vuint16m1_t a, unsigned idx);
uint16_t __riscv_th_vext_x_v_u16m2_u16 (vuint16m2_t a, unsigned idx);
uint16_t __riscv_th_vext_x_v_u16m4_u16 (vuint16m4_t a, unsigned idx);
uint16_t __riscv_th_vext_x_v_u16m8_u16 (vuint16m8_t a, unsigned idx);
uint32_t __riscv_th_vext_x_v_u32m1_u32 (vuint32m1_t a, unsigned idx);
uint32_t __riscv_th_vext_x_v_u32m2_u32 (vuint32m2_t a, unsigned idx);
uint32_t __riscv_th_vext_x_v_u32m4_u32 (vuint32m4_t a, unsigned idx);
uint32_t __riscv_th_vext_x_v_u32m8_u32 (vuint32m8_t a, unsigned idx);
uint64_t __riscv_th_vext_x_v_u64m1_u64 (vuint64m1_t a, unsigned idx);
uint64_t __riscv_th_vext_x_v_u64m2_u64 (vuint64m2_t a, unsigned idx);
uint64_t __riscv_th_vext_x_v_u64m4_u64 (vuint64m4_t a, unsigned idx);
uint64_t __riscv_th_vext_x_v_u64m8_u64 (vuint64m8_t a, unsigned idx);