From 2c04baea1ea7733cbde0e0bbc81f2e58da6c4674 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 27 Oct 2023 10:13:22 -0700 Subject: [PATCH] `build.rs`: Don't compile `avx` asm for `x86`. `avx` (`avx2`, `avx512`) are only available on `x86_64`, not `x86`, so we don't need to compile them on `x86`, as that'll only take longer to compile and bloat the binaries. `dav1d` doesn't seem to do this for some reason, but in C, it gates `avx` asm calls behind `x86_64`, as we do, too. For `ipred16`, `ipred16_avx2` defines a couple constants used by `ipred16_sse`, so we have to compile `ipred16_avx2` on `x86`, still. This is presumably a bug, and I'll try to upstream a fix for this. --- build.rs | 88 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/build.rs b/build.rs index 20c718282..74649ee83 100644 --- a/build.rs +++ b/build.rs @@ -144,55 +144,62 @@ mod asm { let config_path = out_dir.join(config_file_name); fs::write(&config_path, &config_contents).unwrap(); - let x86_generic = &[ + // Note that avx* is never (at runtime) supported on x86. + let x86_generic = &["cdef_sse", "itx_sse", "msac", "refmvs"][..]; + let x86_64_generic = &[ "cdef_avx2", - "cdef_sse", "itx_avx2", "itx_avx512", - "itx_sse", "looprestoration_avx2", - "msac", - "refmvs", ][..]; let x86_bpc8 = &[ + "filmgrain_sse", + "ipred_sse", + "loopfilter_sse", + "looprestoration_sse", + "mc_sse", + ][..]; + let x86_64_bpc8 = &[ "cdef_avx512", "filmgrain_avx2", "filmgrain_avx512", - "filmgrain_sse", "ipred_avx2", "ipred_avx512", - "ipred_sse", "loopfilter_avx2", "loopfilter_avx512", - "loopfilter_sse", "looprestoration_avx512", - "looprestoration_sse", "mc_avx2", "mc_avx512", - "mc_sse", ][..]; let x86_bpc16 = &[ + "cdef16_sse", + "filmgrain16_sse", + "ipred16_sse", + "itx16_sse", + "loopfilter16_sse", + "looprestoration16_sse", + "mc16_sse", + // TODO(kkysen) avx2 shouldn't be in x86, + // but a const used in sse is defined in avx2 (a bug). + "ipred16_avx2", + ][..]; + let x86_64_bpc16 = &[ "cdef16_avx2", "cdef16_avx512", - "cdef16_sse", "filmgrain16_avx2", "filmgrain16_avx512", - "filmgrain16_sse", - "ipred16_avx2", + // TODO(kkysen) avx2 should only be in x86_64, + // but a const used in sse is defined in avx2 (a bug). + // "ipred16_avx2", "ipred16_avx512", - "ipred16_sse", "itx16_avx2", "itx16_avx512", - "itx16_sse", "loopfilter16_avx2", "loopfilter16_avx512", - "loopfilter16_sse", "looprestoration16_avx2", "looprestoration16_avx512", - "looprestoration16_sse", "mc16_avx2", "mc16_avx512", - "mc16_sse", ][..]; let arm_generic = &["itx", "msac", "refmvs", "looprestoration_common"][..]; @@ -214,22 +221,37 @@ mod asm { "mc16", ][..]; - // TODO(kkysen) Should not compile avx on x86. + let x86_all = &[ + x86_generic, + #[cfg(feature = "bitdepth_8")] + x86_bpc8, + #[cfg(feature = "bitdepth_16")] + x86_bpc16, + ][..]; + let x86_64_all = &[ + x86_generic, + x86_64_generic, + #[cfg(feature = "bitdepth_8")] + x86_bpc8, + #[cfg(feature = "bitdepth_8")] + x86_64_bpc8, + #[cfg(feature = "bitdepth_16")] + x86_bpc16, + #[cfg(feature = "bitdepth_16")] + x86_64_bpc16, + ][..]; + let arm_all = &[ + arm_generic, + #[cfg(feature = "bitdepth_8")] + arm_bpc8, + #[cfg(feature = "bitdepth_16")] + arm_bpc16, + ][..]; + let asm_file_names = match arch { - Arch::X86(..) => [ - x86_generic, - #[cfg(feature = "bitdepth_8")] - x86_bpc8, - #[cfg(feature = "bitdepth_16")] - x86_bpc16, - ], - Arch::Arm(..) => [ - arm_generic, - #[cfg(feature = "bitdepth_8")] - arm_bpc8, - #[cfg(feature = "bitdepth_16")] - arm_bpc16, - ], + Arch::X86(ArchX86::X86_32) => x86_all, + Arch::X86(ArchX86::X86_64) => x86_64_all, + Arch::Arm(..) => arm_all, }; let asm_file_dir = match arch {