✨ feat(server): Add Intel/AMD GPU support (#84)

## Description ### This is a DRAFT - Changes will be discussed and made upon requests! In nutshell, this adds support for running Nestri with Intel and AMD GPU's. Both integrated and dedicated. It took a few days to find a trick for having output without dummy plugs or connected displays, but I think I got it. `gpu-screen-recorder` requires a custom patch to skip the check for connected displays (as we're using a xrandr workaround which makes them stay "unconnected") Most likely fixes #68 ### Changes The NVIDIA sections have been split in their own code branches since there's some NVIDIA specific things I didn't feel approriate to poke more than necessary for the goal of this PR. Added a script with helper functions related to GPU discovery and gathering some basic info off from them (note: it might be better to declare the helper script arrays outside it's initially run function). The helper scripts rely on `lshw`. NVIDIA code was slightly adjusted to use the bus-id's provided by the helper functions to have some code re-use. Cleaned up few things on the side. --------- Co-authored-by: Kristian Ollikainen <[email protected]> Co-authored-by: Wanjohi <[email protected]>
nestriness · Jul 7, 2024 · cf69f6c · cf69f6c
1 parent b12b262
commit cf69f6c
Show file tree

Hide file tree

Showing 5 changed files with 441 additions and 60 deletions.
diff --git a/.patches/connectcheckskip.patch b/.patches/connectcheckskip.patch
@@ -0,0 +1,28 @@
+diff --git a/src/utils.c b/src/utils.c
+index e00f3c5..4f1f0bf 100644
+--- a/src/utils.c
++++ b/src/utils.c
+@@ -71,7 +71,7 @@ void for_each_active_monitor_output_x11(Display *display, active_monitor_callbac
+     char display_name[256];
+     for(int i = 0; i < screen_res->noutput; ++i) {
+         XRROutputInfo *out_info = XRRGetOutputInfo(display, screen_res, screen_res->outputs[i]);
+-        if(out_info && out_info->crtc && out_info->connection == RR_Connected) {
++        if(out_info && out_info->crtc) {
+             XRRCrtcInfo *crt_info = XRRGetCrtcInfo(display, screen_res, out_info->crtc);
+             if(crt_info && crt_info->mode) {
+                 const XRRModeInfo *mode_info = get_mode_info(screen_res, crt_info->mode);
+@@ -218,10 +218,10 @@ static void for_each_active_monitor_output_drm(const gsr_egl *egl, active_monito
+             if(connector_type)
+                 ++connector_type->count;
+
+-            if(connector->connection != DRM_MODE_CONNECTED) {
+-                drmModeFreeConnector(connector);
+-                continue;
+-            }
++            //if(connector->connection != DRM_MODE_CONNECTED) {
++            //    drmModeFreeConnector(connector);
++            //    continue;
++            //}
+
+             if(connector_type)
+                 ++connector_type->count_active;
diff --git a/.scripts/entrypoint.sh b/.scripts/entrypoint.sh
@@ -1,6 +1,9 @@
 #!/bin/bash -e
 trap "echo TRAPed signal" HUP INT QUIT TERM
 
+# Include our gpu helper functions
+source /etc/gpu_helpers.sh
+
 # Create and modify permissions of XDG_RUNTIME_DIR
 sudo -u nestri mkdir -pm700 /tmp/runtime-1000
 sudo chown nestri:nestri /tmp/runtime-1000
@@ -63,34 +66,8 @@ sudo /etc/init.d/dbus start
 # Install Proton-GE for this user
 nestri-proton -i
 
-# Install NVIDIA userspace driver components including X graphic libraries
-if ! command -v nvidia-xconfig &> /dev/null; then
-  # Driver version is provided by the kernel through the container toolkit
-  export DRIVER_ARCH="$(dpkg --print-architecture | sed -e 's/arm64/aarch64/' -e 's/armhf/32bit-ARM/' -e 's/i.*86/x86/' -e 's/amd64/x86_64/' -e 's/unknown/x86_64/')"
-  export DRIVER_VERSION="$(head -n1 </proc/driver/nvidia/version | awk '{for(i=1;i<=NF;i++) if ($i ~ /^[0-9]+\.[0-9\.]+/) {print $i; exit}}')"
-  cd /tmp
-  # If version is different, new installer will overwrite the existing components
-  if [ ! -f "/tmp/NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run" ]; then
-    # Check multiple sources in order to probe both consumer and datacenter driver versions
-    curl -fsSL -O "https://international.download.nvidia.com/XFree86/Linux-${DRIVER_ARCH}/${DRIVER_VERSION}/NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run" || curl -fsSL -O "https://international.download.nvidia.com/tesla/${DRIVER_VERSION}/NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run" || { echo "Failed NVIDIA GPU driver download. Exiting."; exit 1; }
-  fi
-  # Extract installer before installing
-  sudo sh "NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run" -x
-  cd "NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}"
-  # Run installation without the kernel modules and host components
-  sudo ./nvidia-installer --silent \
-                    --no-kernel-module \
-                    --install-compat32-libs \
-                    --no-nouveau-check \
-                    --no-nvidia-modprobe \
-                    --no-rpms \
-                    --no-backup \
-                    --no-check-for-alternate-installs
-  sudo rm -rf /tmp/NVIDIA* && cd ~
-fi
-
 # Allow starting Xorg from a pseudoterminal instead of strictly on a tty console
-if [ ! -f /etc/X11/Xwrapper.config ]; then
+if [ ! -f "/etc/X11/Xwrapper.config" ]; then
     echo -e "allowed_users=anybody\nneeds_root_rights=yes" | sudo tee /etc/X11/Xwrapper.config > /dev/null
 fi
 if grep -Fxq "allowed_users=console" /etc/X11/Xwrapper.config; then
@@ -102,22 +79,6 @@ if [ -f "/etc/X11/xorg.conf" ]; then
   sudo rm -f "/etc/X11/xorg.conf"
 fi
 
-# Get first GPU device if all devices are available or `NVIDIA_VISIBLE_DEVICES` is not set
-if [ "$NVIDIA_VISIBLE_DEVICES" == "all" ] || [ -z "$NVIDIA_VISIBLE_DEVICES" ]; then
-  export GPU_SELECT="$(sudo nvidia-smi --query-gpu=uuid --format=csv | sed -n 2p)"
-# Get first GPU device out of the visible devices in other situations
-else
-  export GPU_SELECT="$(sudo nvidia-smi --id=$(echo "$NVIDIA_VISIBLE_DEVICES" | cut -d ',' -f1) --query-gpu=uuid --format=csv | sed -n 2p)"
-  if [ -z "$GPU_SELECT" ]; then
-    export GPU_SELECT="$(sudo nvidia-smi --query-gpu=uuid --format=csv | sed -n 2p)"
-  fi
-fi
-
-if [ -z "$GPU_SELECT" ]; then
-  echo "No NVIDIA GPUs detected or nvidia-container-toolkit not configured. Exiting."
-  exit 1
-fi
-
 # Setting `VIDEO_PORT` to none disables RANDR/XRANDR, do not set this if using datacenter GPUs
 if [ "${VIDEO_PORT,,}" = "none" ]; then
   export CONNECTED_MONITOR="--use-display-device=None"
@@ -126,26 +87,127 @@ else
   export CONNECTED_MONITOR="--connected-monitor=${VIDEO_PORT}"
 fi
 
-# Bus ID from nvidia-smi is in hexadecimal format and should be converted to decimal format (including the domain) which Xorg understands, required because nvidia-xconfig doesn't work as intended in a container
-HEX_ID="$(sudo nvidia-smi --query-gpu=pci.bus_id --id="$GPU_SELECT" --format=csv | sed -n 2p)"
-IFS=":." ARR_ID=($HEX_ID)
-unset IFS
-BUS_ID="PCI:$((16#${ARR_ID[1]}))@$((16#${ARR_ID[0]})):$((16#${ARR_ID[2]})):$((16#${ARR_ID[3]}))"
 # A custom modeline should be generated because there is no monitor to fetch this information normally
-export MODELINE="$(cvt -r "${SIZEW}" "${SIZEH}" "${REFRESH}" | sed -n 2p)"
-# Generate /etc/X11/xorg.conf with nvidia-xconfig
-sudo nvidia-xconfig --virtual="${SIZEW}x${SIZEH}" --depth="$CDEPTH" --mode="$(echo "$MODELINE" | awk '{print $2}' | tr -d '\"')" --allow-empty-initial-configuration --no-probe-all-gpus --busid="$BUS_ID" --include-implicit-metamodes --mode-debug --no-sli --no-base-mosaic --only-one-x-screen ${CONNECTED_MONITOR}
-# Guarantee that the X server starts without a monitor by adding more options to the configuration
-sudo sed -i '/Driver\s\+"nvidia"/a\    Option         "ModeValidation" "NoMaxPClkCheck,NoEdidMaxPClkCheck,NoMaxSizeCheck,NoHorizSyncCheck,NoVertRefreshCheck,NoVirtualSizeCheck,NoExtendedGpuCapabilitiesCheck,NoTotalSizeCheck,NoDualLinkDVICheck,NoDisplayPortBandwidthCheck,AllowNon3DVisionModes,AllowNonHDMI3DModes,AllowNonEdidModes,NoEdidHDMI2Check,AllowDpInterlaced"' /etc/X11/xorg.conf
-# Add custom generated modeline to the configuration
-sudo sed -i '/Section\s\+"Monitor"/a\    '"$MODELINE" /etc/X11/xorg.conf
-# Prevent interference between GPUs, add this to the host or other containers running Xorg as well
-echo -e "Section \"ServerFlags\"\n    Option \"AutoAddGPU\" \"false\"\nEndSection" | sudo tee -a /etc/X11/xorg.conf > /dev/null
+custom_modeline="$(cvt -r "${SIZEW}" "${SIZEH}" "${REFRESH}" | sed -n 2p)"
+custom_modeline_settings="$(echo "$custom_modeline" | sed 's/Modeline //')"
+custom_modeline_identifier="$(echo "$custom_modeline_settings" | awk '{print $1}' | tr -d '"')"
+
+# Pre-populate GPU information manually
+if ! check_and_populate_gpus; then
+  exit 1
+fi
+
+# Select the GPU based on user input or first one available
+selected_gpu="${GPU_SELECTION,,:-}"
+if [[ -z "$selected_gpu" ]]; then
+  selected_gpu="${gpu_map[0]}" # Select first available GPU
+  echo "No GPU selected, using first one available: $selected_gpu"
+elif ! selected_gpu=$(check_selected_gpu "$selected_gpu"); then
+  exit 1
+fi
+
+# Print selected GPU information
+echo "Selected GPU: $(print_gpu_info "$selected_gpu")"
+echo ""
+
+# Get GPU vendor as separate variable
+selected_gpu_vendor=$(get_gpu_vendor "$selected_gpu")
+# Convert lshw gathered bus id into Xorg compatible one
+xorg_bus_id=$(get_gpu_bus_xorg "$selected_gpu")
+
+# Check if the selected GPU is an NVIDIA GPU
+if [[ "${selected_gpu_vendor,,}" =~ "nvidia" ]]; then
+    echo "Selected GPU is NVIDIA. Handling NVIDIA-specific configuration..."
+
+    # Install NVIDIA userspace driver components including X graphic libraries
+    if ! command -v nvidia-xconfig &> /dev/null; then
+      # Driver version is provided by the kernel through the container toolkit
+      export DRIVER_ARCH="$(dpkg --print-architecture | sed -e 's/arm64/aarch64/' -e 's/armhf/32bit-ARM/' -e 's/i.*86/x86/' -e 's/amd64/x86_64/' -e 's/unknown/x86_64/')"
+      export DRIVER_VERSION="$(head -n1 </proc/driver/nvidia/version | awk '{for(i=1;i<=NF;i++) if ($i ~ /^[0-9]+\.[0-9\.]+/) {print $i; exit}}')"
+      cd /tmp
+      # If version is different, new installer will overwrite the existing components
+      if [ ! -f "/tmp/NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run" ]; then
+        # Check multiple sources in order to probe both consumer and datacenter driver versions
+        curl -fsSL -O "https://international.download.nvidia.com/XFree86/Linux-${DRIVER_ARCH}/${DRIVER_VERSION}/NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run" || curl -fsSL -O "https://international.download.nvidia.com/tesla/${DRIVER_VERSION}/NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run" || { echo "Failed NVIDIA GPU driver download. Exiting."; exit 1; }
+      fi
+      # Extract installer before installing
+      sudo sh "NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run" -x
+      cd "NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}"
+      # Run installation without the kernel modules and host components
+      sudo ./nvidia-installer --silent \
+                        --no-kernel-module \
+                        --install-compat32-libs \
+                        --no-nouveau-check \
+                        --no-nvidia-modprobe \
+                        --no-rpms \
+                        --no-backup \
+                        --no-check-for-alternate-installs
+      sudo rm -rf /tmp/NVIDIA* && cd ~
+    fi
+
+    # Generate /etc/X11/xorg.conf with nvidia-xconfig
+    sudo nvidia-xconfig --virtual="${SIZEW}x${SIZEH}" --depth="$CDEPTH" --mode="$(echo "$custom_modeline" | awk '{print $2}' | tr -d '\"')" --allow-empty-initial-configuration --no-probe-all-gpus --busid="$xorg_bus_id" --include-implicit-metamodes --mode-debug --no-sli --no-base-mosaic --only-one-x-screen ${CONNECTED_MONITOR}
+    # Guarantee that the X server starts without a monitor by adding more options to the configuration
+    sudo sed -i '/Driver\s\+"nvidia"/a\    Option         "ModeValidation" "NoMaxPClkCheck,NoEdidMaxPClkCheck,NoMaxSizeCheck,NoHorizSyncCheck,NoVertRefreshCheck,NoVirtualSizeCheck,NoExtendedGpuCapabilitiesCheck,NoTotalSizeCheck,NoDualLinkDVICheck,NoDisplayPortBandwidthCheck,AllowNon3DVisionModes,AllowNonHDMI3DModes,AllowNonEdidModes,NoEdidHDMI2Check,AllowDpInterlaced"' /etc/X11/xorg.conf
+
+    # Add custom generated modeline to the configuration
+    sudo sed -i '/Section\s\+"Monitor"/a\    '"$custom_modeline" /etc/X11/xorg.conf
+    # Prevent interference between GPUs, add this to the host or other containers running Xorg as well
+    echo -e "Section \"ServerFlags\"\n    Option \"AutoAddGPU\" \"false\"\nEndSection" | sudo tee -a /etc/X11/xorg.conf > /dev/null
+else
+    echo "Selected GPU is non-NVIDIA. Handling common configuration..."
+
+    # We need permissions for the GPU(s)
+    sudo chown -R root:root /dev/dri/*
+    sudo chmod -R 777 /dev/dri/*
+
+    # Create common config file
+    sudo touch /etc/X11/xorg.conf
+    config_common_xorg="
+Section \"ServerLayout\"
+    Identifier     \"Layout0\"
+    Screen      0  \"Screen0\"
+    InputDevice    \"Keyboard0\" \"CoreKeyboard\"
+    InputDevice    \"Mouse0\" \"CorePointer\"
+EndSection
+
+Section \"InputDevice\"
+    Identifier     \"Mouse0\"
+    Driver         \"mouse\"
+    Option         \"Protocol\" \"auto\"
+    Option         \"Device\" \"/dev/mouse\"
+    Option         \"Emulate3Buttons\" \"no\"
+    Option         \"ZAxisMapping\" \"4 5\"
+EndSection
+
+Section \"InputDevice\"
+    Identifier     \"Keyboard0\"
+    Driver         \"kbd\"
+EndSection
+
+Section \"Device\"
+    Identifier     \"Device0\"
+    Driver         \"modesetting\"
+    BusID          \"$xorg_bus_id\"
+EndSection
+
+Section \"Screen\"
+    Identifier     \"Screen0\"
+    Device         \"Device0\"
+    Option         \"ModeDebug\" \"True\"
+EndSection
+
+Section \"ServerFLags\"
+    Option \"AutoAddGPU\" \"off\"
+EndSection
+"
+    echo "$config_common_xorg" | sudo tee /etc/X11/xorg.conf > /dev/null
+fi
 
 # Default display is :0 across the container
 export DISPLAY=":0"
 # Run Xorg server with required extensions
-/usr/bin/Xorg vt7 -noreset -novtswitch -sharevts -dpi "${DPI}" +extension "COMPOSITE" +extension "DAMAGE" +extension "GLX" +extension "RANDR" +extension "RENDER" +extension "MIT-SHM" +extension "XFIXES" +extension "XTEST" "${DISPLAY}" &
+/usr/bin/Xorg vt7 -noreset -novtswitch -sharevts -dpi "${DPI}" -fakescreenfps "${REFRESH}" +extension "COMPOSITE" +extension "DAMAGE" +extension "GLX" +extension "RANDR" +extension "RENDER" +extension "MIT-SHM" +extension "XFIXES" +extension "XTEST" "${DISPLAY}" &
 
 # Wait for X11 to start
 echo "Waiting for X socket"
@@ -157,10 +219,52 @@ echo "$(date +"[%Y-%m-%d %H:%M:%S]") Waiting for X socket"
 until [ -S "/tmp/.X11-unix/X${DISPLAY/:/}" ]; do sleep 1; done
 echo "$(date +"[%Y-%m-%d %H:%M:%S]") X socket is ready"
 
+# Additional non-NVIDIA configuration required
+if [[ ! "${selected_gpu_vendor,,}" =~ "nvidia" ]]; then
+  # Get a list of all available outputs (connected or disconnected)
+  all_outputs=($(xrandr --query | awk '/ connected| disconnected/ {print $1}'))
+
+  for selected_output in "${all_outputs[@]}"; do
+    # Create a unique mode identifier by appending the output name
+    unique_mode_identifier="${selected_output}-${custom_modeline_identifier}"
+
+    # Create a unique modeline setting with the new identifier
+    unique_modeline_settings="$(echo "$custom_modeline_settings" | sed "s/$custom_modeline_identifier/$unique_mode_identifier/" | tr -d '"')"
+
+    # Check if the mode already exists for this output (avoid duplicates)
+    if xrandr --query | grep "$selected_output" | grep -q "$unique_mode_identifier"; then
+      echo "Mode '$unique_mode_identifier' already exists for output '$selected_output', skipping.."
+      continue
+    fi
+
+    # Add the new mode for the specific output (using the unique settings variable)
+    if xrandr --newmode $unique_modeline_settings; then
+      echo "Successfully added mode '$unique_mode_identifier' for output '$selected_output'"
+
+      # Configure the output to use the new mode
+      if xrandr --addmode "$selected_output" "$unique_mode_identifier" && \
+         xrandr --output "$selected_output" --primary --mode "$unique_mode_identifier"; then
+        echo "Successfully configured output '$selected_output' to use mode '$unique_mode_identifier'"
+        break
+      fi
+    fi
+
+    echo "Failed to configure output '$selected_output' to use mode '$unique_mode_identifier', trying the next output.."
+  done
+
+  if [[ "$selected_output" == "${all_outputs[-1]}" ]]; then
+    echo "Could not configure any output with the desired mode"
+    exit 1
+  fi
+fi
+
+# Make sure gpu-screen-recorder is owned by nestri
+sudo chown nestri:nestri /usr/bin/gpu-screen-recorder
+
 if [[ -z "${SESSION_ID}" ]]; then
   echo "$(date +"[%Y-%m-%d %H:%M:%S]") No stream name was found, did you forget to set the env variable NAME?" && exit 1
 else
-  /usr/bin/gpu-screen-recorder -w screen -c flv -f 60 -a "$(pactl get-default-sink).monitor" | ffmpeg -hide_banner -v quiet -i pipe:0 -c copy -f mp4 -movflags empty_moov+frag_every_frame+separate_moof+omit_tfhd_offset - | /usr/bin/warp --name "${SESSION_ID}" https://fst.so:4443 &
+  /usr/bin/gpu-screen-recorder -v no -w screen -c flv -f "${REFRESH}" -a "$(pactl get-default-sink).monitor" | ffmpeg -hide_banner -v quiet -i pipe:0 -c copy -f mp4 -movflags empty_moov+frag_every_frame+separate_moof+omit_tfhd_offset - | /usr/bin/warp --name "${SESSION_ID}" https://fst.so:4443 &
 fi
 
 openbox-session &