Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for fixed-width 128-bit SVE implementation #503

Merged
merged 5 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ option(ASTCENC_ISA_AVX2 "Enable astcenc builds for AVX2 SIMD")
option(ASTCENC_ISA_SSE41 "Enable astcenc builds for SSE4.1 SIMD")
option(ASTCENC_ISA_SSE2 "Enable astcenc builds for SSE2 SIMD")
option(ASTCENC_ISA_SVE_256 "Enable astcenc builds for 256-bit SVE SIMD")
option(ASTCENC_ISA_SVE_128 "Enable astcenc builds for 128-bit SVE SIMD")
option(ASTCENC_ISA_NEON "Enable astcenc builds for NEON SIMD")
option(ASTCENC_ISA_NONE "Enable astcenc builds for no SIMD")
option(ASTCENC_ISA_NATIVE "Enable astcenc builds for native SIMD")
Expand Down Expand Up @@ -87,7 +88,7 @@ endforeach()

# Count options which MUST be arm64
set(ASTCENC_ARM64_ISA_COUNT 0)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_SVE_256})
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_SVE_256})
foreach(ASTCENC_CONFIG ${ASTCENC_CONFIGS})
if(${ASTCENC_CONFIG})
math(EXPR ASTCENC_ARM64_ISA_COUNT "${ASTCENC_ARM64_ISA_COUNT} + 1")
Expand Down Expand Up @@ -120,6 +121,7 @@ endif()

message(STATUS "Arm backend options")
printopt("SVE 256b backend " ${ASTCENC_ISA_SVE_256})
printopt("SVE 128b backend " ${ASTCENC_ISA_SVE_128})
printopt("NEON backend " ${ASTCENC_ISA_NEON})
message(STATUS "x86-64 backend options")
printopt("AVX2 backend " ${ASTCENC_ISA_AVX2})
Expand Down
1 change: 1 addition & 0 deletions Docs/ChangeLog-4x.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The 4.9.0 release is a minor maintenance release.
reference implementation.
* **Bug fix:** Fixed sincos table index under/overflow.
* **Feature:** Added backend for Arm SVE fixed-width 256-bit builds.
* **Feature:** Added backend for Arm SVE fixed-width 128-bit builds.
* **Feature:** Optimized NEON mask `any()` and `all()` functions.
* **Feature:** Migrated build and test to GitHub Actions pipelines.

Expand Down
6 changes: 4 additions & 2 deletions Source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ else()
set(ASTCENC_CODEC enc)
endif()

set(ASTCENC_ARTIFACTS native none sve_256 neon avx2 sse4.1 sse2)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2})
set(ASTCENC_ARTIFACTS native none sve_256 sve_128 neon avx2 sse4.1 sse2)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2})
list(LENGTH ASTCENC_ARTIFACTS ASTCENC_ARTIFACTS_LEN)
math(EXPR ASTCENC_ARTIFACTS_LEN "${ASTCENC_ARTIFACTS_LEN} - 1")

Expand All @@ -40,6 +40,8 @@ foreach(INDEX RANGE ${ASTCENC_ARTIFACTS_LEN})

if(${ASTCENC_ISA_SIMD} MATCHES "sve_256")
# Not suported on macOS
elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128")
# Not suported on macOS
elseif(${ASTCENC_ISA_SIMD} MATCHES "neon")
set(CMAKE_OSX_ARCHITECTURES arm64)
elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2")
Expand Down
6 changes: 4 additions & 2 deletions Source/UnitTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# under the License.
# ----------------------------------------------------------------------------

set(ASTCENC_ARTIFACTS native none sve_256 neon avx2 sse4.1 sse2)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2})
set(ASTCENC_ARTIFACTS native none sve_256 sve_128 neon avx2 sse4.1 sse2)
set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2})
list(LENGTH ASTCENC_ARTIFACTS ASTCENC_ARTIFACTS_LEN)
math(EXPR ASTCENC_ARTIFACTS_LEN "${ASTCENC_ARTIFACTS_LEN} - 1")

Expand All @@ -28,6 +28,8 @@ foreach(INDEX RANGE ${ASTCENC_ARTIFACTS_LEN})

if(${ASTCENC_ISA_SIMD} MATCHES "sve_256")
# Not supported on macOS
elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128")
# Not supported on macOS
elseif(${ASTCENC_ISA_SIMD} MATCHES "neon")
set(CMAKE_OSX_ARCHITECTURES arm64)
elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2")
Expand Down
15 changes: 15 additions & 0 deletions Source/UnitTest/cmake_core.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,21 @@ elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_256")
PRIVATE
-march=armv8-a+sve -msve-vector-bits=256)

elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128")
target_compile_definitions(${ASTCENC_TEST}
PRIVATE
ASTCENC_NEON=1
ASTCENC_SVE=4
ASTCENC_SSE=0
ASTCENC_AVX=0
ASTCENC_POPCNT=0
ASTCENC_F16C=0)

# Enable SVE
target_compile_options(${ASTCENC_TEST}
PRIVATE
-march=armv8-a+sve -msve-vector-bits=128)

elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2")
target_compile_definitions(${ASTCENC_TEST}
PRIVATE
Expand Down
2 changes: 2 additions & 0 deletions Source/astcenccli_toplevel_help.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,8 @@ void astcenc_print_header()
const char* simdtype = "sse2";
#elif (ASTCENC_SVE == 8)
const char* simdtype = "sve.256b";
#elif (ASTCENC_SVE == 4)
const char* simdtype = "sve.128b";
#elif (ASTCENC_NEON == 1)
const char* simdtype = "neon";
#else
Expand Down
23 changes: 23 additions & 0 deletions Source/cmake_core.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,29 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE)
-march=armv8-a+sve)
endif()

elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128")
target_compile_definitions(${ASTCENC_TARGET_NAME}
PRIVATE
ASTCENC_NEON=1
ASTCENC_SVE=4
ASTCENC_SSE=0
ASTCENC_AVX=0
ASTCENC_POPCNT=0
ASTCENC_F16C=0)

# Enable SVE in the core library
if (NOT ${ASTCENC_VENEER_TYPE})
target_compile_options(${ASTCENC_TARGET_NAME}
PRIVATE
-march=armv8-a+sve -msve-vector-bits=128)

# Enable SVE without fixed vector length in the veneer
elseif (${ASTCENC_VENEER_TYPE} EQUAL 2)
target_compile_options(${ASTCENC_TARGET_NAME}
PRIVATE
-march=armv8-a+sve)
endif()

elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2")
target_compile_definitions(${ASTCENC_TARGET_NAME}
PRIVATE
Expand Down
6 changes: 3 additions & 3 deletions Test/astc_test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,11 +306,11 @@ def parse_command_line():
"ref-2.5-neon", "ref-2.5-sse2", "ref-2.5-sse4.1", "ref-2.5-avx2",
"ref-3.7-neon", "ref-3.7-sse2", "ref-3.7-sse4.1", "ref-3.7-avx2",
"ref-4.8-neon", "ref-4.8-sse2", "ref-4.8-sse4.1", "ref-4.8-avx2",
"ref-main-neon", "ref-main-sve_256", "ref-main-sse2", "ref-main-sse4.1", "ref-main-avx2"]
"ref-main-neon", "ref-main-sve_256", "ref-main-sve_128", "ref-main-sse2", "ref-main-sse4.1", "ref-main-avx2"]

# All test encoders
testcoders = ["none", "neon", "sve_256", "sse2", "sse4.1", "avx2", "native", "universal"]
testcodersAArch64 = ["neon", "sve_256"]
testcoders = ["none", "neon", "sve_256", "sve_128", "sse2", "sse4.1", "avx2", "native", "universal"]
testcodersAArch64 = ["neon", "sve_256", "sve_128"]
testcodersX86 = ["sse2", "sse4.1", "avx2"]

coders = refcoders + testcoders + ["all-aarch64", "all-x86"]
Expand Down