Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove 3DNow! from X86TargetParser. #99352

Merged
merged 5 commits into from
Jul 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/lib/Headers/prfchwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*/

#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> instead."
#endif

#ifndef __PRFCHWINTRIN_H
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/attr-target-x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-vaes"
// CHECK-NOT: tune-cpu
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-3dnow,-3dnowa,-mmx"
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
// CHECK-NOT: tune-cpu
// CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge"
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/builtins/cpu_model/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ enum ProcessorFeatures {
FEATURE_AVX512VP2INTERSECT,
// FIXME: Below Features has some missings comparing to gcc, it's because gcc
// has some not one-to-one mapped in llvm.
FEATURE_3DNOW,
// FEATURE_3DNOW,
// FEATURE_3DNOWP,
FEATURE_ADX = 40,
// FEATURE_ABM,
Expand Down
8 changes: 6 additions & 2 deletions llvm/include/llvm/TargetParser/X86TargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,12 @@ X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16", 34)
X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 35)
// Below Features has some missings comparing to gcc, it's because gcc has some
// not one-to-one mapped in llvm.
X86_FEATURE_COMPAT(3DNOW, "3dnow", 0)
X86_FEATURE (3DNOWA, "3dnowa")
Comment on lines -178 to -179
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@FreddyLeaf IIRC, we need to find two features to fill here, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, if gcc also deleted 3dnow, then find 1 feature to fill here. if not, then find 2.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I realized even gcc removed, they will keep one blank here to keep backward compatibility. So two features are required.
according to this comment above the file:

// We cannot re-adjust the position of X86_FEATURE_COMPAT at the whole list.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can put sub-features of APX here if you do not have better candidates. egpr,ndd,push2pop2,ppx,ccmp,cf,nf,zu

Copy link
Contributor

@FreddyLeaf FreddyLeaf Jul 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bad news is that only zu can be "used" here. all other sub-features of APX have already be "used."

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this thread. What exactly needs to match what else?

Also, if it's important for each feature to be assigned a specific ABI-stable number for some reason, we should assign that number directly in this macro, not indirectly via ordering...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could I just put this in place of these two lines for now?

X86_FEATURE       (DUMMYFEATURE1,           "__dummyfeature1")
X86_FEATURE       (DUMMYFEATURE2,          "__dummyfeature2")

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've gone ahead and done that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


// FIXME: dummy features were added to keep the numeric values of later features
// stable. Since the values need to be ABI stable, they should be changed to
// have explicitly assigned values, and then these dummy features removed.
X86_FEATURE (DUMMYFEATURE1, "__dummyfeature1")
X86_FEATURE (DUMMYFEATURE2, "__dummyfeature2")
X86_FEATURE_COMPAT(ADX, "adx", 0)
X86_FEATURE (64BIT, "64bit")
X86_FEATURE_COMPAT(CLDEMOTE, "cldemote", 0)
Expand Down
20 changes: 9 additions & 11 deletions llvm/lib/TargetParser/X86TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,14 @@ constexpr FeatureBitset FeaturesClearwaterforest =

// Geode Processor.
constexpr FeatureBitset FeaturesGeode =
FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to add a FeaturePREFTECH of some kind?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly not?

We didn't enable it before (so, e.g. __PRFCHW__ isn't and wasn't defined), and while we DID enable the prefetchw instruction in the backend for these targets, we still do now (via implying it in X86.td).

OTOH, maybe it's weird to have that divergence, so I'd also be fine with adding it (and therefore setting the preprocessor define).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I somewhat lean towards submitting the commit as-is, but will change this if you wish)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should add the define. If/when we eventually remove mm3dnow.h then the only way to access prfchwintrin.h is via x86intrin.h (which we already recommend) which can sometimes require the __PRFCHW__ define:

#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHW__)
#include <prfchwintrin.h>
#endif

Also, we still have references to mm3dnow.h in prfchwintrin.h that probably need removing:

#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
#endif

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, making the change.

I note that "sometimes" is essentially "if __SCE__ is defined". Really weird to have that be Sony-specific behavior.

FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeaturePRFCHW;

// K6 processor.
constexpr FeatureBitset FeaturesK6 = FeatureX87 | FeatureCMPXCHG8B | FeatureMMX;

// K7 and K8 architecture processors.
constexpr FeatureBitset FeaturesAthlon =
FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA;
FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeaturePRFCHW;
constexpr FeatureBitset FeaturesAthlonXP =
FeaturesAthlon | FeatureFXSR | FeatureSSE;
constexpr FeatureBitset FeaturesK8 =
Expand Down Expand Up @@ -256,8 +256,8 @@ constexpr ProcInfo Processors[] = {
// i486-generation processors.
{ {"i486"}, CK_i486, ~0U, FeatureX87, '\0', false },
{ {"winchip-c6"}, CK_WinChipC6, ~0U, FeaturesPentiumMMX, '\0', false },
{ {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX | Feature3DNOW, '\0', false },
{ {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX | Feature3DNOW, '\0', false },
{ {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX | FeaturePRFCHW, '\0', false },
{ {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX | FeaturePRFCHW, '\0', false },
// i586-generation processors, P5 microarchitecture based.
{ {"i586"}, CK_i586, ~0U, FeatureX87 | FeatureCMPXCHG8B, '\0', false },
{ {"pentium"}, CK_Pentium, ~0U, FeatureX87 | FeatureCMPXCHG8B, 'B', false },
Expand Down Expand Up @@ -386,8 +386,8 @@ constexpr ProcInfo Processors[] = {
{ {"lakemont"}, CK_Lakemont, ~0U, FeatureCMPXCHG8B, '\0', false },
// K6 architecture processors.
{ {"k6"}, CK_K6, ~0U, FeaturesK6, '\0', false },
{ {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 | Feature3DNOW, '\0', false },
{ {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 | Feature3DNOW, '\0', false },
{ {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 | FeaturePRFCHW, '\0', false },
{ {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 | FeaturePRFCHW, '\0', false },
// K7 architecture processors.
{ {"athlon"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false },
{ {"athlon-tbird"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false },
Expand Down Expand Up @@ -493,6 +493,7 @@ constexpr FeatureBitset ImpliedFeaturesFXSR = {};
constexpr FeatureBitset ImpliedFeaturesINVPCID = {};
constexpr FeatureBitset ImpliedFeaturesLWP = {};
constexpr FeatureBitset ImpliedFeaturesLZCNT = {};
constexpr FeatureBitset ImpliedFeaturesMMX = {};
constexpr FeatureBitset ImpliedFeaturesMWAITX = {};
constexpr FeatureBitset ImpliedFeaturesMOVBE = {};
constexpr FeatureBitset ImpliedFeaturesMOVDIR64B = {};
Expand Down Expand Up @@ -520,6 +521,8 @@ constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {};
constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {};
constexpr FeatureBitset ImpliedFeaturesX87 = {};
constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE1 = {};
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE2 = {};

// Not really CPU features, but need to be in the table because clang uses
// target features to communicate them to the backend.
Expand All @@ -534,11 +537,6 @@ constexpr FeatureBitset ImpliedFeaturesXSAVEC = FeatureXSAVE;
constexpr FeatureBitset ImpliedFeaturesXSAVEOPT = FeatureXSAVE;
constexpr FeatureBitset ImpliedFeaturesXSAVES = FeatureXSAVE;

// MMX->3DNOW->3DNOWA chain.
constexpr FeatureBitset ImpliedFeaturesMMX = {};
constexpr FeatureBitset ImpliedFeatures3DNOW = FeatureMMX;
constexpr FeatureBitset ImpliedFeatures3DNOWA = Feature3DNOW;

// SSE/AVX/AVX512F chain.
constexpr FeatureBitset ImpliedFeaturesSSE = {};
constexpr FeatureBitset ImpliedFeaturesSSE2 = FeatureSSE;
Expand Down
Loading