From e8ce33e57cd034f1b539a8ea6afc80cc37614e15 Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Wed, 19 Jun 2024 12:23:32 +0200 Subject: [PATCH] Initial commit --- docs/.gitignore | 2 + docs/conf.py | 20 +++++- docs/index.md | 1 + docs/sphinx/_toc.yml.in | 1 + docs/understand/programming_interface.rst | 75 +++++++++++++++++++++++ 5 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 docs/understand/programming_interface.rst diff --git a/docs/.gitignore b/docs/.gitignore index 53b7787fbd..efa76a1b65 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -6,3 +6,5 @@ /doxygen/html /doxygen/xml /sphinx/_toc.yml +understand/user-kernel-space-compat-matrix-content.rst +understand/algorithm-libraries-support-matrix-content.rst \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 3dec52d636..9a0c435127 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,6 +6,7 @@ import re from typing import Any, Dict, List +from urllib.request import urlretrieve from rocm_docs import ROCmDocs @@ -43,4 +44,21 @@ cpp_id_attributes = ["__global__", "__device__", "__host__", "__forceinline__", "static"] cpp_paren_attributes = ["__declspec"] -suppress_warnings = ["etoc.toctree"] \ No newline at end of file +suppress_warnings = ["etoc.toctree"] + +urlretrieve ( + "https://raw.githubusercontent.com/ROCm/rocm-install-on-linux/rocm-versioning-content/docs/reference/user-kernel-space-compat-matrix-content.rst", + "understand/user-kernel-space-compat-matrix-content.rst" +) + +urlretrieve ( + "https://raw.githubusercontent.com/ROCm/rocm-install-on-linux/rocm-versioning-content/docs/reference/algorithm-libraries-support-matrix-content.rst", + "understand/algorithm-libraries-support-matrix-content.rst" +) + +exclude_patterns = [ + "doxygen/mainpage.md", + "understand/glossary.md", + "understand/user-kernel-space-compat-matrix-content.rst", + "understand/algorithm-libraries-support-matrix-content.rst" +] \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index b6fc4e912b..f32f1733aa 100644 --- a/docs/index.md +++ b/docs/index.md @@ -27,6 +27,7 @@ The CUDA enabled NVIDIA GPUs are supported by HIP. For more information, see [GP * {doc}`./understand/programming_model` * {doc}`./understand/programming_model_reference` +* {doc}`./understand/programming_interface` * {doc}`./understand/hardware_implementation` ::: diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index 775af34ce7..7aa5b1dd44 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -17,6 +17,7 @@ subtrees: entries: - file: understand/programming_model - file: understand/programming_model_reference + - file: understand/programming_interface - file: understand/hardware_implementation - caption: How to diff --git a/docs/understand/programming_interface.rst b/docs/understand/programming_interface.rst new file mode 100644 index 0000000000..a170298e96 --- /dev/null +++ b/docs/understand/programming_interface.rst @@ -0,0 +1,75 @@ +.. meta:: + :description: This chapter describes the HIP programming model, the contract + between the programmer and the compiler/runtime executing the + code. + :keywords: AMD, ROCm, HIP, CUDA, C++ language extensions + +******************************************************************************* +Programming interface +******************************************************************************* + +HIP defines a model for mapping single instruction, multiple threads (SIMT) programs +onto various architectures, primarily GPUs. While the model may be expressed +in most imperative languages, (for example Python via PyHIP) this document will focus on +the original C/C++ API of HIP. + +HIP compilers +============= + +AMD platform compilers +---------------------- + +ROCm currently provides two compiler interfaces for compiling HIP programs: + +* ``/opt/rocm/bin/hipcc`` +* ``/opt/rocm/bin/amdclang++`` + +Both leverage the same LLVM compiler technology with the AMD GCN GPU support; however, they offer a slightly different user experience. The `hipcc` command-line interface aims to provide a more familiar user interface to users who are experienced in CUDA but relatively new to the ROCm/HIP development environment. On the other hand, `amdclang++` provides a user interface identical to the clang++ compiler. It is more suitable for experienced developers who want to directly interact with the clang compiler and gain full control of their application’s build process. + +The major differences between `hipcc` and `amdclang++` are listed below: + +.. list-table:: + :header-rows: 1 + + * - + - ``hipcc`` + - ``amdclang++`` + + * - Compiling HIP source files . + - Treats all source files as HIP language source files. + - Enables the HIP language support for files with the ``.hip`` extension or through the ``-x hip`` compiler option + + * - Detecting GPU architecture. + - Auto-detects the GPUs available on the system and generates code for those devices when no GPU architecture is specified. + - Has AMD GCN gfx803 as the default GPU architecture. The ``--offload-arch`` compiler option may be used to target other GPU architectures. + + * - Finding a HIP installation. + - Finds the HIP installation based on its own location and its knowledge about the ROCm directory structure. + - First looks for HIP under the same parent directory as its own LLVM directory and then falls back on ``/opt/rocm``. Users can use the ``--rocm-path`` option to instruct the compiler to use HIP from the specified ROCm installation. + + * - Linking to the HIP runtime library. + - Is configured to automatically link to the HIP runtime from the detected HIP installation. + - Requires the ``--hip-link`` flag to be specified to link to the HIP runtime. Alternatively, users can use the ``-l -lamdhip64`` option to link to a HIP runtime library. + + * - Device function inlining. + - Inlines all GPU device functions, which provide greater performance and compatibility for codes that contain file scoped or device function scoped ``__shared__`` variables. However, it may increase compile time. + - Relies on inlining heuristics to control inlining. Users experiencing performance or compilation issues with code using file scoped or device function scoped ``__shared__`` variables could try ``-mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false`` to work around the issue. There are plans to address these issues with future compiler improvements. + + * - Source code location. + - `HIPCC `_ + - `amdclang++ `_ + +For compiler options check the `GPU compiler option page `_. + +NVIDIA platform compilers +------------------------- + +HIP versioning +=============== + +.. include:: algorithm-libraries-support-matrix-content.rst + +Linux kernel driver support +--------------------------- + +.. include:: user-kernel-space-compat-matrix-content.rst \ No newline at end of file