summaryrefslogtreecommitdiff
path: root/pkgs/development/rocm-modules/6/rocblas/default.nix
diff options
context:
space:
mode:
Diffstat (limited to 'pkgs/development/rocm-modules/6/rocblas/default.nix')
-rw-r--r--pkgs/development/rocm-modules/6/rocblas/default.nix122
1 files changed, 81 insertions, 41 deletions
diff --git a/pkgs/development/rocm-modules/6/rocblas/default.nix b/pkgs/development/rocm-modules/6/rocblas/default.nix
index 7c5fbe801b19..c3621933267d 100644
--- a/pkgs/development/rocm-modules/6/rocblas/default.nix
+++ b/pkgs/development/rocm-modules/6/rocblas/default.nix
@@ -14,21 +14,24 @@
gtest,
gfortran,
openmp,
+ git,
amd-blis,
+ zstd,
+ hipblas-common,
+ hipblaslt,
python3Packages,
+ rocm-smi,
buildTensile ? true,
- buildTests ? false,
- buildBenchmarks ? false,
- tensileLogic ? "asm_full",
- tensileCOVersion ? "default",
+ buildTests ? true,
+ buildBenchmarks ? true,
# https://github.com/ROCm/Tensile/issues/1757
# Allows gfx101* users to use rocBLAS normally.
# Turn the below two values to `true` after the fix has been cherry-picked
# into a release. Just backporting that single fix is not enough because it
# depends on some previous commits.
- tensileSepArch ? false,
- tensileLazyLib ? false,
- tensileLibFormat ? "msgpack",
+ tensileSepArch ? true,
+ tensileLazyLib ? true,
+ withHipBlasLt ? true,
# `gfx940`, `gfx941` are not present in this list because they are early
# engineering samples, and all final MI300 hardware are `gfx942`:
# https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130
@@ -37,38 +40,47 @@
# would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will
# always try to use `gfx1010` code objects, hence building for `gfx1012` is
# useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152
- gpuTargets ? [
- "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102"
- ],
+ gpuTargets ? (
+ clr.localGpuTargets or [
+ "gfx900"
+ "gfx906"
+ "gfx908"
+ "gfx90a"
+ "gfx942"
+ "gfx1010"
+ "gfx1030"
+ "gfx1100"
+ "gfx1101"
+ "gfx1102"
+ ]
+ ),
}:
+let
+ gpuTargets' = lib.concatStringsSep ";" gpuTargets;
+in
stdenv.mkDerivation (finalAttrs: {
- pname = "rocblas";
- version = "6.0.2";
+ pname = "rocblas${clr.gpuArchSuffix}";
+ version = "6.3.3";
- outputs =
- [
- "out"
- ]
- ++ lib.optionals buildTests [
- "test"
- ]
- ++ lib.optionals buildBenchmarks [
- "benchmark"
- ];
+ outputs = [
+ "out"
+ ];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocBLAS";
rev = "rocm-${finalAttrs.version}";
- hash = "sha256-G68d/gvBbTdNx8xR3xY+OkBm5Yxq1NFjxby9BbpOcUk=";
+ hash = "sha256-IYcrVcGH4yZDkFZeNOJPfG0qsPS/WiH0fTSUSdo1BH4=";
};
nativeBuildInputs =
[
cmake
+ # no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
+ git
]
++ lib.optionals buildTensile [
tensile
@@ -77,12 +89,17 @@ stdenv.mkDerivation (finalAttrs: {
buildInputs =
[
python3
+ hipblas-common
+ ]
+ ++ lib.optionals withHipBlasLt [
+ hipblaslt
]
++ lib.optionals buildTensile [
+ zstd
msgpack
libxml2
python3Packages.msgpack
- python3Packages.joblib
+ python3Packages.zstandard
]
++ lib.optionals buildTests [
gtest
@@ -91,38 +108,61 @@ stdenv.mkDerivation (finalAttrs: {
gfortran
openmp
amd-blis
+ rocm-smi
]
++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
python3Packages.pyyaml
];
+ dontStrip = true;
+ env.CXXFLAGS =
+ "-O3 -DNDEBUG -I${hipblas-common}/include"
+ + lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis";
+ # Fails to link tests if we don't add amd-blis libs
+ env.LDFLAGS = lib.optionalString (
+ buildTests || buildBenchmarks
+ ) "-Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas";
+ env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++";
+
cmakeFlags =
[
- (lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc")
- (lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
+ (lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release")
+ (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
+ (lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR")
+ (lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran")
+ (lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar")
+ (lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib")
(lib.cmakeFeature "python" "python3")
- (lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets))
+ (lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets')
+ (lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets')
+ (lib.cmakeFeature "GPU_TARGETS" gpuTargets')
(lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
(lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
(lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
+ (lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt)
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
- # rocblas header files are not installed unless we set this
- (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include")
+ (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks)
+ (lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true)
+ # Temporarily set variables to work around upstream CMakeLists issue
+ # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
+ "-DCMAKE_INSTALL_BINDIR=bin"
+ "-DCMAKE_INSTALL_INCLUDEDIR=include"
+ "-DCMAKE_INSTALL_LIBDIR=lib"
]
++ lib.optionals buildTensile [
+ "-DCPACK_SET_DESTDIR=OFF"
+ "-DLINK_BLIS=ON"
+ "-DTensile_CODE_OBJECT_VERSION=default"
+ "-DTensile_LOGIC=asm_full"
+ "-DTensile_LIBRARY_FORMAT=msgpack"
(lib.cmakeBool "BUILD_WITH_PIP" false)
- (lib.cmakeFeature "Tensile_LOGIC" tensileLogic)
- (lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion)
(lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
(lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
- (lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat)
- (lib.cmakeBool "Tensile_PRINT_DEBUG" true)
- ]
- ++ lib.optionals (buildTests || buildBenchmarks) [
- (lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis")
];
+ passthru.amdgpu_targets = gpuTargets';
+
patches = [
(fetchpatch {
name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
@@ -135,14 +175,17 @@ stdenv.mkDerivation (finalAttrs: {
postPatch = ''
substituteInPlace cmake/build-options.cmake \
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
+ substituteInPlace CMakeLists.txt \
+ --replace-fail "4.42.0" "4.43.0"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
- owner = finalAttrs.src.owner;
- repo = finalAttrs.src.repo;
+ inherit (finalAttrs.src) owner;
+ inherit (finalAttrs.src) repo;
};
+ enableParallelBuilding = true;
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
@@ -151,8 +194,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
- broken =
- versions.minor finalAttrs.version != versions.minor stdenv.cc.version
- || versionAtLeast finalAttrs.version "7.0.0";
};
})