Compare commits
8 Commits
old-master
...
ba98023645
| Author | SHA1 | Date | |
|---|---|---|---|
| ba98023645 | |||
| 7188f06111 | |||
| 850f5f6ad6 | |||
| 95ba4f30ab | |||
| 93cc24a40b | |||
| 06779ba55e | |||
| 4df710ff97 | |||
| c2a433216a |
@@ -4,6 +4,7 @@
|
|||||||
imports = [
|
imports = [
|
||||||
../common/base.nix
|
../common/base.nix
|
||||||
../common/xeon/console.nix
|
../common/xeon/console.nix
|
||||||
|
../module/amd-uprof.nix
|
||||||
../module/emulation.nix
|
../module/emulation.nix
|
||||||
../module/nvidia.nix
|
../module/nvidia.nix
|
||||||
../module/slurm-client.nix
|
../module/slurm-client.nix
|
||||||
@@ -29,12 +30,17 @@
|
|||||||
# Use performance for benchmarks
|
# Use performance for benchmarks
|
||||||
powerManagement.cpuFreqGovernor = "performance";
|
powerManagement.cpuFreqGovernor = "performance";
|
||||||
|
|
||||||
|
services.amd-uprof.enable = true;
|
||||||
|
|
||||||
# Disable NUMA balancing
|
# Disable NUMA balancing
|
||||||
boot.kernel.sysctl."kernel.numa_balancing" = 0;
|
boot.kernel.sysctl."kernel.numa_balancing" = 0;
|
||||||
|
|
||||||
# Expose kernel addresses
|
# Expose kernel addresses
|
||||||
boot.kernel.sysctl."kernel.kptr_restrict" = 0;
|
boot.kernel.sysctl."kernel.kptr_restrict" = 0;
|
||||||
|
|
||||||
|
# Disable NMI watchdog to save one hw counter (for AMD uProf)
|
||||||
|
boot.kernel.sysctl."kernel.nmi_watchdog" = 0;
|
||||||
|
|
||||||
services.openssh.settings.X11Forwarding = true;
|
services.openssh.settings.X11Forwarding = true;
|
||||||
|
|
||||||
services.fail2ban.enable = true;
|
services.fail2ban.enable = true;
|
||||||
|
|||||||
49
m/module/amd-uprof.nix
Normal file
49
m/module/amd-uprof.nix
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
options = {
|
||||||
|
services.amd-uprof = {
|
||||||
|
enable = lib.mkOption {
|
||||||
|
type = lib.types.bool;
|
||||||
|
default = false;
|
||||||
|
description = "Whether to enable AMD uProf.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# Only setup amd-uprof if enabled
|
||||||
|
config = lib.mkIf config.services.amd-uprof.enable {
|
||||||
|
|
||||||
|
# First make sure that we add the module to the list of available modules
|
||||||
|
# in the kernel matching the same kernel version of this configuration.
|
||||||
|
boot.extraModulePackages = with config.boot.kernelPackages; [ amd-uprof-driver ];
|
||||||
|
boot.kernelModules = [ "AMDPowerProfiler" ];
|
||||||
|
|
||||||
|
# Make the userspace tools available in $PATH.
|
||||||
|
environment.systemPackages = with pkgs; [ amd-uprof ];
|
||||||
|
|
||||||
|
# The AMDPowerProfiler module doesn't create the /dev device nor it emits
|
||||||
|
# any uevents, so we cannot use udev rules to automatically create the
|
||||||
|
# device. Instead, we run a systemd unit that does it after loading the
|
||||||
|
# modules.
|
||||||
|
systemd.services.amd-uprof-device = {
|
||||||
|
description = "Create /dev/AMDPowerProfiler device";
|
||||||
|
after = [ "systemd-modules-load.service" ];
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
unitConfig.ConditionPathExists = [
|
||||||
|
"/proc/AMDPowerProfiler/device"
|
||||||
|
"!/dev/AMDPowerProfiler"
|
||||||
|
];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
RemainAfterExit = true;
|
||||||
|
ExecStart = pkgs.writeShellScript "add-amd-uprof-dev.sh" ''
|
||||||
|
mknod /dev/AMDPowerProfiler -m 666 c $(< /proc/AMDPowerProfiler/device) 0
|
||||||
|
'';
|
||||||
|
ExecStop = pkgs.writeShellScript "remove-amd-uprof-dev.sh" ''
|
||||||
|
rm -f /dev/AMDPowerProfiler
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
82
pkgs/amd-uprof/default.nix
Normal file
82
pkgs/amd-uprof/default.nix
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
{ stdenv
|
||||||
|
, lib
|
||||||
|
, curl
|
||||||
|
, cacert
|
||||||
|
, runCommandLocal
|
||||||
|
, autoPatchelfHook
|
||||||
|
, elfutils
|
||||||
|
, glib
|
||||||
|
, libGL
|
||||||
|
, ncurses5
|
||||||
|
, xorg
|
||||||
|
, zlib
|
||||||
|
, libxkbcommon
|
||||||
|
, freetype
|
||||||
|
, fontconfig
|
||||||
|
, libGLU
|
||||||
|
, dbus
|
||||||
|
, rocmPackages
|
||||||
|
, libxcrypt-legacy
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
version = "5.1.701";
|
||||||
|
tarball = "AMDuProf_Linux_x64_${version}.tar.bz2";
|
||||||
|
|
||||||
|
uprofSrc = runCommandLocal tarball {
|
||||||
|
nativeBuildInputs = [ curl ];
|
||||||
|
outputHash = "sha256-j9gxcBcIg6Zhc5FglUXf/VV9bKSo+PAKeootbN7ggYk=";
|
||||||
|
SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt";
|
||||||
|
} ''
|
||||||
|
curl \
|
||||||
|
-o $out \
|
||||||
|
'https://download.amd.com/developer/eula/uprof/uprof-5-1/${tarball}' \
|
||||||
|
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0' \
|
||||||
|
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' \
|
||||||
|
-H 'Accept-Language: en-US,en;q=0.5' \
|
||||||
|
-H 'Accept-Encoding: gzip, deflate, br, zstd' \
|
||||||
|
-H 'Referer: https://www.amd.com/' 2>&1 | tr '\r' '\n'
|
||||||
|
'';
|
||||||
|
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation {
|
||||||
|
pname = "AMD-uProf";
|
||||||
|
inherit version;
|
||||||
|
src = uprofSrc;
|
||||||
|
dontStrip = true;
|
||||||
|
phases = [ "installPhase" "fixupPhase" ];
|
||||||
|
nativeBuildInputs = [ autoPatchelfHook ];
|
||||||
|
buildInputs = [
|
||||||
|
stdenv.cc.cc.lib
|
||||||
|
ncurses5
|
||||||
|
elfutils
|
||||||
|
glib
|
||||||
|
libGL
|
||||||
|
libGLU
|
||||||
|
libxcrypt-legacy
|
||||||
|
xorg.libX11
|
||||||
|
xorg.libXext
|
||||||
|
xorg.libXi
|
||||||
|
xorg.libXmu
|
||||||
|
xorg.libxcb
|
||||||
|
xorg.xcbutilwm
|
||||||
|
xorg.xcbutilrenderutil
|
||||||
|
xorg.xcbutilkeysyms
|
||||||
|
xorg.xcbutilimage
|
||||||
|
fontconfig.lib
|
||||||
|
libxkbcommon
|
||||||
|
zlib
|
||||||
|
freetype
|
||||||
|
dbus
|
||||||
|
rocmPackages.rocprofiler
|
||||||
|
];
|
||||||
|
installPhase = ''
|
||||||
|
set -x
|
||||||
|
mkdir -p $out
|
||||||
|
tar -x -v -C $out --strip-components=1 -f $src
|
||||||
|
rm $out/bin/AMDPowerProfilerDriverSource.tar.gz
|
||||||
|
patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so
|
||||||
|
patchelf --add-needed libcrypt.so.1 --add-needed libstdc++.so.6 $out/bin/AMDuProfSys
|
||||||
|
set +x
|
||||||
|
'';
|
||||||
|
}
|
||||||
35
pkgs/amd-uprof/driver.nix
Normal file
35
pkgs/amd-uprof/driver.nix
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
{ stdenv
|
||||||
|
, lib
|
||||||
|
, amd-uprof
|
||||||
|
, curl
|
||||||
|
, cacert
|
||||||
|
, kernel
|
||||||
|
, runCommandLocal
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
version = amd-uprof.version;
|
||||||
|
tarball = amd-uprof.src;
|
||||||
|
in stdenv.mkDerivation {
|
||||||
|
pname = "AMDPowerProfilerDriver";
|
||||||
|
inherit version;
|
||||||
|
src = runCommandLocal "AMDPowerProfilerDriverSource.tar.gz" { } ''
|
||||||
|
set -x
|
||||||
|
tar -x -f ${tarball} AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz
|
||||||
|
mv AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz $out
|
||||||
|
set +x
|
||||||
|
'';
|
||||||
|
hardeningDisable = [ "pic" "format" ];
|
||||||
|
nativeBuildInputs = kernel.moduleBuildDependencies;
|
||||||
|
patches = [ ./makefile.patch ./hrtimer.patch ];
|
||||||
|
makeFlags = [
|
||||||
|
"KERNEL_VERSION=${kernel.modDirVersion}"
|
||||||
|
"KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"
|
||||||
|
"INSTALL_MOD_PATH=$(out)"
|
||||||
|
];
|
||||||
|
meta = {
|
||||||
|
description = "AMD Power Profiler Driver";
|
||||||
|
homepage = "https://www.amd.com/es/developer/uprof.html";
|
||||||
|
platforms = lib.platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
31
pkgs/amd-uprof/hrtimer.patch
Normal file
31
pkgs/amd-uprof/hrtimer.patch
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
--- a/src/PmcTimerConfig.c 2025-09-04 12:17:16.771707049 +0200
|
||||||
|
+++ b/src/PmcTimerConfig.c 2025-09-04 12:17:04.878515468 +0200
|
||||||
|
@@ -99,7 +99,7 @@ static void PmcInitTimer(void* pInfo)
|
||||||
|
|
||||||
|
DRVPRINT("pTimerConfig(%p)", pTimerConfig);
|
||||||
|
|
||||||
|
- hrtimer_init(&pTimerConfig->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||||
|
+ hrtimer_setup(&pTimerConfig->m_hrTimer, PmcTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||||
|
}
|
||||||
|
|
||||||
|
int PmcSetupTimer(ClientContext* pClientCtx)
|
||||||
|
@@ -157,7 +157,6 @@ int PmcSetupTimer(ClientContext* pClient
|
||||||
|
{
|
||||||
|
/* Interval in ms */
|
||||||
|
pTimerConfig->m_time = ktime_set(interval / 1000, interval * 1000000);
|
||||||
|
- pTimerConfig->m_hrTimer.function = PmcTimerCallback;
|
||||||
|
|
||||||
|
DRVPRINT("retVal(%d) m_time(%lld)", retVal, (long long int) pTimerConfig->m_time);
|
||||||
|
}
|
||||||
|
--- a/src/PwrProfTimer.c 2025-09-04 12:18:08.750544327 +0200
|
||||||
|
+++ b/src/PwrProfTimer.c 2025-09-04 12:18:28.557863382 +0200
|
||||||
|
@@ -573,8 +573,7 @@ void InitHrTimer(uint32 cpu)
|
||||||
|
pCoreClientData = &per_cpu(g_coreClientData, cpu);
|
||||||
|
|
||||||
|
// initialize HR timer
|
||||||
|
- hrtimer_init(&pCoreClientData->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||||
|
- pCoreClientData->m_hrTimer.function = &HrTimerCallback;
|
||||||
|
+ hrtimer_setup(&pCoreClientData->m_hrTimer, &HrTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||||
|
|
||||||
|
return;
|
||||||
|
} // InitHrTimer
|
||||||
66
pkgs/amd-uprof/makefile.patch
Normal file
66
pkgs/amd-uprof/makefile.patch
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
--- a/Makefile 2025-06-19 20:36:49.346693267 +0200
|
||||||
|
+++ b/Makefile 2025-06-19 20:42:29.778088660 +0200
|
||||||
|
@@ -27,7 +27,7 @@ MODULE_VERSION=$(shell cat AMDPowerProfi
|
||||||
|
MODULE_NAME_KO=$(MODULE_NAME).ko
|
||||||
|
|
||||||
|
# check is module inserted
|
||||||
|
-MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
|
||||||
|
+#MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
|
||||||
|
|
||||||
|
# check pcore dkms status
|
||||||
|
PCORE_DKMS_STATUS=$(shell dkms status | grep $(MODULE_NAME) | grep $(MODULE_VERSION))
|
||||||
|
@@ -50,7 +50,7 @@ endif
|
||||||
|
# “-Wno-missing-attributes” is added for GCC version >= 9.0 and kernel version <= 5.00
|
||||||
|
G_VERSION=9
|
||||||
|
K_VERSION=5
|
||||||
|
-KERNEL_MAJOR_VERSION=$(shell uname -r | cut -f1 -d.)
|
||||||
|
+KERNEL_MAJOR_VERSION=$(shell echo "$(KERNEL_VERSION)" | cut -f1 -d.)
|
||||||
|
GCCVERSION = $(shell gcc -dumpversion | cut -f1 -d.)
|
||||||
|
ifeq ($(G_VERSION),$(firstword $(sort $(GCCVERSION) $(G_VERSION))))
|
||||||
|
ifeq ($(K_VERSION),$(lastword $(sort $(KERNEL_MAJOR_VERSION) $(K_VERSION))))
|
||||||
|
@@ -66,17 +66,7 @@ ${MODULE_NAME}-objs := src/PmcDataBuffe
|
||||||
|
|
||||||
|
# make
|
||||||
|
all:
|
||||||
|
- @chmod a+x ./AMDPPcert.sh
|
||||||
|
- @./AMDPPcert.sh 0 1; echo $$? > $(PWD)/sign_status;
|
||||||
|
- @SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
|
||||||
|
- if [ $$SIGSTATUS1 -eq 1 ]; then \
|
||||||
|
- exit 1; \
|
||||||
|
- fi
|
||||||
|
- @make -C /lib/modules/$(KERNEL_VERSION)/build M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules
|
||||||
|
- @SIGSTATUS3=`cat $(PWD)/sign_status | tr -d '\n'`; \
|
||||||
|
- if [ $$SIGSTATUS3 -eq 0 ]; then \
|
||||||
|
- ./AMDPPcert.sh 1 $(MODULE_NAME_KO); \
|
||||||
|
- fi
|
||||||
|
+ make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) CFLAGS_MODULE="$(EXTRA_CFLAGS)" modules
|
||||||
|
|
||||||
|
# make clean
|
||||||
|
clean:
|
||||||
|
@@ -84,23 +74,9 @@ clean:
|
||||||
|
|
||||||
|
# make install
|
||||||
|
install:
|
||||||
|
- @mkdir -p /lib/modules/`uname -r`/kernel/drivers/extra
|
||||||
|
- @rm -f /lib/modules/`uname -r`/kernel/drivers/extra/$(MODULE_NAME_KO)
|
||||||
|
- @cp $(MODULE_NAME_KO) /lib/modules/`uname -r`/kernel/drivers/extra/
|
||||||
|
- @depmod -a
|
||||||
|
- @if [ ! -z "$(MODPROBE_OUTPUT)" ]; then \
|
||||||
|
- echo "Uninstalling AMDPowerProfiler Linux kernel module.";\
|
||||||
|
- rmmod $(MODULE_NAME);\
|
||||||
|
- fi
|
||||||
|
- @modprobe $(MODULE_NAME) 2> $(PWD)/sign_status1; \
|
||||||
|
- cat $(PWD)/sign_status1 | grep "Key was rejected by service"; \
|
||||||
|
- echo $$? > $(PWD)/sign_status; SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
|
||||||
|
- if [ $$SIGSTATUS1 -eq 0 ]; then \
|
||||||
|
- echo "ERROR: Secure Boot enabled, correct key is not yet enrolled in BIOS key table"; \
|
||||||
|
- exit 1; \
|
||||||
|
- else \
|
||||||
|
- cat $(PWD)/sign_status1; \
|
||||||
|
- fi
|
||||||
|
+ mkdir -p $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
|
||||||
|
+ cp -a $(MODULE_NAME_KO) $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
|
||||||
|
+
|
||||||
|
# make dkms
|
||||||
|
dkms:
|
||||||
|
@chmod a+x ./AMDPPcert.sh
|
||||||
@@ -53,4 +53,15 @@ final: prev:
|
|||||||
meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { };
|
meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { };
|
||||||
upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { };
|
upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { };
|
||||||
cudainfo = prev.callPackage ./cudainfo/default.nix { };
|
cudainfo = prev.callPackage ./cudainfo/default.nix { };
|
||||||
|
|
||||||
|
amd-uprof = prev.callPackage ./amd-uprof/default.nix { };
|
||||||
|
|
||||||
|
# FIXME: Extend this to all linuxPackages variants. Open problem, see:
|
||||||
|
# https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636
|
||||||
|
linuxPackages = prev.linuxPackages.extend (_final: _prev: {
|
||||||
|
amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
|
||||||
|
});
|
||||||
|
linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: {
|
||||||
|
amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -96,6 +96,15 @@ Then just run `nix develop` from the same directory:
|
|||||||
Cuda compilation tools, release 12.4, V12.4.99
|
Cuda compilation tools, release 12.4, V12.4.99
|
||||||
Build cuda_12.4.r12.4/compiler.33961263_0
|
Build cuda_12.4.r12.4/compiler.33961263_0
|
||||||
|
|
||||||
|
## AMD uProf
|
||||||
|
|
||||||
|
The [AMD uProf](https://www.amd.com/en/developer/uprof.html) performance
|
||||||
|
analysis tool-suite is installed and ready to use.
|
||||||
|
|
||||||
|
See the [AMD uProf user guide](https://docs.amd.com/r/en-US/57368-uProf-user-guide)
|
||||||
|
for more details on how to use the tools. To use the GUI make sure that you
|
||||||
|
connect to fox using X11 forwarding.
|
||||||
|
|
||||||
## Filesystems
|
## Filesystems
|
||||||
|
|
||||||
The machine has several file systems available.
|
The machine has several file systems available.
|
||||||
|
|||||||
Reference in New Issue
Block a user