Skip to content

Commit

Permalink
Backport arm asm improvements from dav1d 1.4.2 (#1300)
Browse files Browse the repository at this point in the history
Improvements include implementation of various functions using `dotprod`
and `i8mm` ISA extensions.
  • Loading branch information
fbossen authored Jul 17, 2024
2 parents 068be6b + 6b87a77 commit b26781a
Show file tree
Hide file tree
Showing 8 changed files with 1,960 additions and 197 deletions.
24 changes: 21 additions & 3 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ mod asm {
if let Arch::Arm(arch) = arch {
define(Define::bool("ARCH_ARM", arch == ArchArm::Arm32));
define(Define::bool("ARCH_AARCH64", arch == ArchArm::Arm64));

if arch == ArchArm::Arm64 {
define(Define::bool("HAVE_DOTPROD", true));
define(Define::bool("HAVE_I8MM", true));
}
}

if let Arch::X86(arch) = arch {
Expand Down Expand Up @@ -199,6 +204,7 @@ mod asm {
][..];

let arm_generic = &["itx", "msac", "refmvs", "looprestoration_common"][..];
let arm_dotprod = &["mc_dotprod"][..];
let arm_bpc8 = &[
"cdef",
"filmgrain",
Expand Down Expand Up @@ -243,11 +249,20 @@ mod asm {
#[cfg(feature = "bitdepth_16")]
arm_bpc16,
][..];
let arm64_all = &[
arm_generic,
arm_dotprod,
#[cfg(feature = "bitdepth_8")]
arm_bpc8,
#[cfg(feature = "bitdepth_16")]
arm_bpc16,
][..];

let asm_file_names = match arch {
Arch::X86(ArchX86::X86_32) => x86_all,
Arch::X86(ArchX86::X86_64) => x86_64_all,
Arch::Arm(..) => arm_all,
Arch::Arm(ArchArm::Arm32) => arm_all,
Arch::Arm(ArchArm::Arm64) => arm64_all,
};

let asm_file_dir = match arch {
Expand Down Expand Up @@ -293,8 +308,11 @@ mod asm {
}
cc.compile(rav1dasm);
} else {
cc::Build::new()
.files(asm_file_paths)
let mut cc = cc::Build::new();
if arch == Arch::Arm(ArchArm::Arm64) {
cc.flag("-march=armv8.6-a");
}
cc.files(asm_file_paths)
.include(".")
.include(&out_dir)
.debug(cfg!(debug_assertions))
Expand Down
10 changes: 8 additions & 2 deletions include/common/bitdepth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,10 @@ macro_rules! bd_fn {
///
/// Similar to [`bd_fn!`] except that it selects which [`BitDepth`] `fn`
/// based on `$bpc:literal bpc` instead of `$BD:ty`.
#[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))]
#[cfg(all(
feature = "asm",
any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")
))]
macro_rules! bpc_fn {
($bpc:literal bpc, $name:ident, $asm:ident) => {{
use $crate::include::common::bitdepth::fn_identity;
Expand Down Expand Up @@ -487,7 +490,10 @@ macro_rules! fn_identity {
))]
pub(crate) use bd_fn;

#[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))]
#[cfg(all(
feature = "asm",
any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")
))]
pub(crate) use bpc_fn;

#[allow(unused)]
Expand Down
Loading

0 comments on commit b26781a

Please sign in to comment.