Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into tagusekind
Browse files Browse the repository at this point in the history
  • Loading branch information
Endilll committed May 20, 2024
2 parents 9c89a7b + 82c5d35 commit 7955ff9
Show file tree
Hide file tree
Showing 305 changed files with 6,954 additions and 5,080 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/llvm-bugs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
if: github.repository == 'llvm/llvm-project'
steps:
- uses: actions/setup-node@v3
- uses: actions/setup-node@v4
with:
node-version: 18
check-latest: true
Expand Down
14 changes: 9 additions & 5 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,8 @@ static uint64_t fixDoubleJumps(BinaryFunction &Function, bool MarkInvalid) {
MCPlusBuilder *MIB = Function.getBinaryContext().MIB.get();
for (BinaryBasicBlock &BB : Function) {
auto checkAndPatch = [&](BinaryBasicBlock *Pred, BinaryBasicBlock *Succ,
const MCSymbol *SuccSym) {
const MCSymbol *SuccSym,
std::optional<uint32_t> Offset) {
// Ignore infinite loop jumps or fallthrough tail jumps.
if (Pred == Succ || Succ == &BB)
return false;
Expand Down Expand Up @@ -715,9 +716,11 @@ static uint64_t fixDoubleJumps(BinaryFunction &Function, bool MarkInvalid) {
Pred->removeSuccessor(&BB);
Pred->eraseInstruction(Pred->findInstruction(Branch));
Pred->addTailCallInstruction(SuccSym);
MCInst *TailCall = Pred->getLastNonPseudoInstr();
assert(TailCall);
MIB->setOffset(*TailCall, BB.getOffset());
if (Offset) {
MCInst *TailCall = Pred->getLastNonPseudoInstr();
assert(TailCall);
MIB->setOffset(*TailCall, *Offset);
}
} else {
return false;
}
Expand Down Expand Up @@ -760,7 +763,8 @@ static uint64_t fixDoubleJumps(BinaryFunction &Function, bool MarkInvalid) {
if (Pred->getSuccessor() == &BB ||
(Pred->getConditionalSuccessor(true) == &BB && !IsTailCall) ||
Pred->getConditionalSuccessor(false) == &BB)
if (checkAndPatch(Pred, Succ, SuccSym) && MarkInvalid)
if (checkAndPatch(Pred, Succ, SuccSym, MIB->getOffset(*Inst)) &&
MarkInvalid)
BB.markValid(BB.pred_size() != 0 || BB.isLandingPad() ||
BB.isEntryPoint());
}
Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ bool YAMLProfileReader::parseFunctionProfile(
FuncRawBranchCount += YamlSI.Count;
BF.setRawBranchCount(FuncRawBranchCount);

if (BF.empty())
return true;

if (!opts::IgnoreHash &&
YamlBF.Hash != BF.computeHash(IsDFSOrder, HashFunction)) {
if (opts::Verbosity >= 1)
Expand Down
8 changes: 4 additions & 4 deletions bolt/test/X86/bb-with-two-tail-calls.s
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# This reproduces a bug with dynostats when trying to compute branch stats
# at a block with two tails calls (one conditional and one unconditional).

# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
Expand All @@ -13,7 +11,7 @@
# CHECK-NOT: Assertion `BranchInfo.size() == 2 && "could only be called for blocks with 2 successors"' failed.
# Two tail calls in the same basic block after SCTC:
# CHECK: {{.*}}: ja {{.*}} # TAILCALL # Offset: 7 # CTCTakenCount: 4
# CHECK-NEXT: {{.*}}: jmp {{.*}} # TAILCALL # Offset: 12
# CHECK-NEXT: {{.*}}: jmp {{.*}} # TAILCALL # Offset: 13

.globl _start
_start:
Expand All @@ -23,7 +21,9 @@ a: ja b
x: ret
# FDATA: 1 _start #a# 1 _start #b# 2 4
b: jmp e
c: jmp f
c:
.nops 1
jmp f

.globl e
e:
Expand Down
71 changes: 71 additions & 0 deletions bolt/test/X86/yaml-non-simple.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
## Check that YAML profile for non-simple function is not reported as stale.

# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml --profile-ignore-hash -v=1 \
# RUN: --report-stale 2>&1 | FileCheck %s

# CHECK: BOLT-INFO: could not disassemble function main. Will ignore.
# CHECK: BOLT-INFO: could not disassemble function main.cold. Will ignore.
# CHECK: BOLT-INFO: 0 out of 2 functions in the binary (0.0%) have non-empty execution profile
# CHECK: BOLT-INFO: 1 function with profile could not be optimized

#--- main.s
.globl main
.type main, @function
main:
.cfi_startproc
.LBB00:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
testq %rax, %rax
js .LBB03
.LBB01:
jne .LBB04
.LBB02:
nop
.LBB03:
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
.LBB04:
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.size main, .-main

.globl main.cold
.type main.cold, @function
main.cold:
.cfi_startproc
nop
.cfi_endproc
.size main.cold, .-main.cold

#--- yaml
---
header:
profile-version: 1
binary-name: 'yaml-non-simple.s.tmp.exe'
binary-build-id: '<unknown>'
profile-flags: [ lbr ]
profile-origin: branch profile reader
profile-events: ''
dfs-order: false
hash-func: xxh3
functions:
- name: main
fid: 0
hash: 0x0000000000000000
exec: 1
nblocks: 5
blocks:
- bid: 1
insns: 1
succ: [ { bid: 3, cnt: 1} ]
...
6 changes: 2 additions & 4 deletions clang-tools-extra/modularize/ModularizeUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,11 +435,9 @@ static std::string replaceDotDot(StringRef Path) {
llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
E = llvm::sys::path::end(Path);
while (B != E) {
if (B->compare(".") == 0) {
}
else if (B->compare("..") == 0)
if (*B == "..")
llvm::sys::path::remove_filename(Buffer);
else
else if (*B != ".")
llvm::sys::path::append(Buffer, *B);
++B;
}
Expand Down
137 changes: 137 additions & 0 deletions clang/docs/HLSL/AvailabilityDiagnostics.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
=============================
HLSL Availability Diagnostics
=============================

.. contents::
:local:

Introduction
============

HLSL availability diagnostics emits errors or warning when unavailable shader APIs are used. Unavailable shader APIs are APIs that are exposed in HLSL code but are not available in the target shader stage or shader model version.

There are three modes of HLSL availability diagnostic:

#. **Default mode** - compiler emits an error when an unavailable API is found in a code that is reachable from the shader entry point function or from an exported library function (when compiling a shader library)

#. **Relaxed mode** - same as default mode except the compiler emits a warning. This mode is enabled by ``-Wno-error=hlsl-availability``.

#. **Strict mode** - compiler emits an error when an unavailable API is found in parsed code regardless of whether it can be reached from the shader entry point or exported functions, or not. This mode is enabled by ``-fhlsl-strict-availability``.

Implementation Details
======================

Environment Parameter
---------------------

In order to encode API availability based on the shader model version and shader model stage a new ``environment`` parameter was added to the existing Clang ``availability`` attribute.

The values allowed for this parameter are a subset of values allowed as the ``llvm::Triple`` environment component. If the environment parameters is present, the declared availability attribute applies only to targets with the same platform and environment.

Default and Relaxed Diagnostic Modes
------------------------------------

This mode is implemented in ``DiagnoseHLSLAvailability`` class in ``SemaHLSL.cpp`` and it is invoked after the whole translation unit is parsed (from ``Sema::ActOnEndOfTranslationUnit``). The implementation iterates over all shader entry points and exported library functions in the translation unit and performs an AST traversal of each function body.

When a reference to another function or member method is found (``DeclRefExpr`` or ``MemberExpr``) and it has a body, the AST of the referenced function is also scanned. This chain of AST traversals will reach all of the code that is reachable from the initial shader entry point or exported library function and avoids the need to generate a call graph.

All shader APIs have an availability attribute that specifies the shader model version (and environment, if applicable) when this API was first introduced.When a reference to a function without a definition is found and it has an availability attribute, the version of the attribute is checked against the target shader model version and shader stage (if shader stage context is known), and an appropriate diagnostic is generated as needed.

All shader entry functions have ``HLSLShaderAttr`` attribute that specifies what type of shader this function represents. However, for exported library functions the target shader stage is unknown, so in this case the HLSL API availability will be only checked against the shader model version. It means that for exported library functions the diagnostic of APIs with availability specific to shader stage will be deferred until DXIL linking time.

A list of functions that were already scanned is kept in order to avoid duplicate scans and diagnostics (see ``DiagnoseHLSLAvailability::ScannedDecls``). It might happen that a shader library has multiple shader entry points for different shader stages that all call into the same shared function. It is therefore important to record not just that a function has been scanned, but also in which shader stage context. This is done by using ``llvm::DenseMap`` that maps ``FunctionDecl *`` to a ``unsigned`` bitmap that represents a set of shader stages (or environments) the function has been scanned for. The ``N``'th bit in the set is set if the function has been scanned in shader environment whose ``HLSLShaderAttr::ShaderType`` integer value equals ``N``.

The emitted diagnostic messages belong to ``hlsl-availability`` diagnostic group and are reported as errors by default. With ``-Wno-error=hlsl-availability`` flag they become warning, making it relaxed HLSL diagnostics mode.

Strict Diagnostic Mode
----------------------

When strict HLSL availability diagnostic mode is enabled the compiler must report all HLSL API availability issues regardless of code reachability. The implementation of this mode takes advantage of an existing diagnostic scan in ``DiagnoseUnguardedAvailability`` class which is already traversing AST of each function as soon as the function body has been parsed. For HLSL, this pass was only slightly modified, such as making sure diagnostic messages are in the ``hlsl-availability`` group and that availability checks based on shader stage are not included if the shader stage context is unknown.

If the compilation target is a shader library, only availability based on shader model version can be diagnosed during this scan. To diagnose availability based on shader stage, the compiler needs to run the AST traversals implementated in ``DiagnoseHLSLAvailability`` at the end of the translation unit as described above.

As a result, availability based on specific shader stage will only be diagnosed in code that is reachable from a shader entry point or library export function. It also means that function bodies might be scanned multiple time. When that happens, care should be taken not to produce duplicated diagnostics.

========
Examples
========

**Note**
For the example below, the ``WaveActiveCountBits`` API function became available in shader model 6.0 and ``WaveMultiPrefixSum`` in shader model 6.5.

The availability of ``ddx`` function depends on a shader stage. It is available for pixel shaders in shader model 2.1 and higher, for compute, mesh and amplification shaders in shader model 6.6 and higher. For any other shader stages it is not available.

Compute shader example
======================

.. code-block:: c++

float unusedFunction(float f) {
return ddx(f);
}

[numthreads(4, 4, 1)]
void main(uint3 threadId : SV_DispatchThreadId) {
float f1 = ddx(threadId.x);
float f2 = WaveActiveCountBits(threadId.y == 1.0);
}

When compiled as compute shader for shader model version 5.0, Clang will emit the following error by default:

.. code-block:: console
<>:7:13: error: 'ddx' is only available in compute shader environment on Shader Model 6.6 or newer
<>:8:13: error: 'WaveActiveCountBits' is only available on Shader Model 6.5 or newer
With relaxed diagnostic mode this errors will become warnings.

With strict diagnostic mode, in addition to the 2 errors above Clang will also emit error for the ``ddx`` call in ``unusedFunction``.:

.. code-block:: console
<>:2:9: error: 'ddx' is only available in compute shader environment on Shader Model 6.5 or newer
<>:7:13: error: 'ddx' is only available in compute shader environment on Shader Model 6.5 or newer
<>:7:13: error: 'WaveActiveCountBits' is only available on Shader Model 6.5 or newer
Shader library example
======================

.. code-block:: c++

float myFunction(float f) {
return ddx(f);
}

float unusedFunction(float f) {
return WaveMultiPrefixSum(f, 1.0);
}

[shader("compute")]
[numthreads(4, 4, 1)]
void main(uint3 threadId : SV_DispatchThreadId) {
float f = 3;
float e = myFunction(f);
}

[shader("pixel")]
void main() {
float f = 3;
float e = myFunction(f);
}

When compiled as shader library vshader model version 6.4, Clang will emit the following error by default:

.. code-block:: console
<>:2:9: error: 'ddx' is only available in compute shader environment on Shader Model 6.5 or newer
With relaxed diagnostic mode this errors will become warnings.

With strict diagnostic mode Clang will also emit errors for availability issues in code that is not used by any of the entry points:

.. code-block:: console
<>2:9: error: 'ddx' is only available in compute shader environment on Shader Model 6.6 or newer
<>:6:9: error: 'WaveActiveCountBits' is only available on Shader Model 6.5 or newer
Note that ``myFunction`` is reachable from both pixel and compute shader entry points is therefore scanned twice - once for each context. The diagnostic is emitted only for the compute shader context.
1 change: 1 addition & 0 deletions clang/docs/HLSL/HLSLDocs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ HLSL Design and Implementation
ResourceTypes
EntryFunctions
FunctionCalls
AvailabilityDiagnostics
24 changes: 4 additions & 20 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,7 @@ New Compiler Flags

- ``-fexperimental-late-parse-attributes`` enables an experimental feature to
allow late parsing certain attributes in specific contexts where they would
not normally be late parsed. Currently this allows late parsing the
`counted_by` attribute in C. See `Attribute Changes in Clang`_.
not normally be late parsed.

- ``-fseparate-named-sections`` uses separate unique sections for global
symbols in named special sections (i.e. symbols annotated with
Expand Down Expand Up @@ -407,24 +406,6 @@ Attribute Changes in Clang
- The ``clspv_libclc_builtin`` attribute has been added to allow clspv
(`OpenCL-C to Vulkan SPIR-V compiler <https://github.com/google/clspv>`_) to identify functions coming from libclc
(`OpenCL-C builtin library <https://libclc.llvm.org>`_).
- The ``counted_by`` attribute is now allowed on pointers that are members of a
struct in C.

- The ``counted_by`` attribute can now be late parsed in C when
``-fexperimental-late-parse-attributes`` is passed but only when attribute is
used in the declaration attribute position. This allows using the
attribute on existing code where it previously impossible to do so without
re-ordering struct field declarations would break ABI as shown below.

.. code-block:: c
struct BufferTy {
/* Refering to `count` requires late parsing */
char* buffer __counted_by(count);
/* Swapping `buffer` and `count` to avoid late parsing would break ABI */
size_t count;
};

Improvements to Clang's diagnostics
-----------------------------------
Expand Down Expand Up @@ -605,6 +586,9 @@ Bug Fixes in This Version
- Clang now correctly disallows VLA type compound literals, e.g. ``(int[size]){}``,
as the C standard mandates. (#GH89835)

- ``__is_array`` and ``__is_bounded_array`` no longer return ``true`` for
zero-sized arrays. Fixes (#GH54705).

Bug Fixes to Compiler Builtins
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
1 change: 0 additions & 1 deletion clang/include/clang/AST/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -2515,7 +2515,6 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
bool isRecordType() const;
bool isClassType() const;
bool isStructureType() const;
bool isStructureTypeWithFlexibleArrayMember() const;
bool isObjCBoxableRecordType() const;
bool isInterfaceType() const;
bool isStructureOrClassType() const;
Expand Down
Loading

0 comments on commit 7955ff9

Please sign in to comment.