diff options
Diffstat (limited to 'pkgs/development/rocm-modules/6/rocblas/default.nix')
| -rw-r--r-- | pkgs/development/rocm-modules/6/rocblas/default.nix | 122 |
1 files changed, 81 insertions, 41 deletions
diff --git a/pkgs/development/rocm-modules/6/rocblas/default.nix b/pkgs/development/rocm-modules/6/rocblas/default.nix index 7c5fbe801b19..c3621933267d 100644 --- a/pkgs/development/rocm-modules/6/rocblas/default.nix +++ b/pkgs/development/rocm-modules/6/rocblas/default.nix @@ -14,21 +14,24 @@ gtest, gfortran, openmp, + git, amd-blis, + zstd, + hipblas-common, + hipblaslt, python3Packages, + rocm-smi, buildTensile ? true, - buildTests ? false, - buildBenchmarks ? false, - tensileLogic ? "asm_full", - tensileCOVersion ? "default", + buildTests ? true, + buildBenchmarks ? true, # https://github.com/ROCm/Tensile/issues/1757 # Allows gfx101* users to use rocBLAS normally. # Turn the below two values to `true` after the fix has been cherry-picked # into a release. Just backporting that single fix is not enough because it # depends on some previous commits. - tensileSepArch ? false, - tensileLazyLib ? false, - tensileLibFormat ? "msgpack", + tensileSepArch ? true, + tensileLazyLib ? true, + withHipBlasLt ? true, # `gfx940`, `gfx941` are not present in this list because they are early # engineering samples, and all final MI300 hardware are `gfx942`: # https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130 @@ -37,38 +40,47 @@ # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will # always try to use `gfx1010` code objects, hence building for `gfx1012` is # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 - gpuTargets ? [ - "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" - ], + gpuTargets ? ( + clr.localGpuTargets or [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx942" + "gfx1010" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" + ] + ), }: +let + gpuTargets' = lib.concatStringsSep ";" gpuTargets; +in stdenv.mkDerivation (finalAttrs: { - pname = "rocblas"; - version = "6.0.2"; + pname = "rocblas${clr.gpuArchSuffix}"; + version = "6.3.3"; - outputs = - [ - "out" - ] - ++ lib.optionals buildTests [ - "test" - ] - ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + outputs = [ + "out" + ]; src = fetchFromGitHub { owner = "ROCm"; repo = "rocBLAS"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-G68d/gvBbTdNx8xR3xY+OkBm5Yxq1NFjxby9BbpOcUk="; + hash = "sha256-IYcrVcGH4yZDkFZeNOJPfG0qsPS/WiH0fTSUSdo1BH4="; }; nativeBuildInputs = [ cmake + # no ninja, it buffers console output and nix times out long periods of no output rocm-cmake clr + git ] ++ lib.optionals buildTensile [ tensile @@ -77,12 +89,17 @@ stdenv.mkDerivation (finalAttrs: { buildInputs = [ python3 + hipblas-common + ] + ++ lib.optionals withHipBlasLt [ + hipblaslt ] ++ lib.optionals buildTensile [ + zstd msgpack libxml2 python3Packages.msgpack - python3Packages.joblib + python3Packages.zstandard ] ++ lib.optionals buildTests [ gtest @@ -91,38 +108,61 @@ stdenv.mkDerivation (finalAttrs: { gfortran openmp amd-blis + rocm-smi ] ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ python3Packages.pyyaml ]; + dontStrip = true; + env.CXXFLAGS = + "-O3 -DNDEBUG -I${hipblas-common}/include" + + lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis"; + # Fails to link tests if we don't add amd-blis libs + env.LDFLAGS = lib.optionalString ( + buildTests || buildBenchmarks + ) "-Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas"; + env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++"; + cmakeFlags = [ - (lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc") - (lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") + (lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release") + (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true) + (lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR") + (lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran") + (lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar") + (lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib") (lib.cmakeFeature "python" "python3") - (lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets)) + (lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets') + (lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets') + (lib.cmakeFeature "GPU_TARGETS" gpuTargets') (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile) (lib.cmakeBool "ROCM_SYMLINK_LIBS" false) (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas") + (lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt) (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) - # rocblas header files are not installed unless we set this - (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include") + (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks) + (lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true) + # Temporarily set variables to work around upstream CMakeLists issue + # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DCMAKE_INSTALL_LIBDIR=lib" ] ++ lib.optionals buildTensile [ + "-DCPACK_SET_DESTDIR=OFF" + "-DLINK_BLIS=ON" + "-DTensile_CODE_OBJECT_VERSION=default" + "-DTensile_LOGIC=asm_full" + "-DTensile_LIBRARY_FORMAT=msgpack" (lib.cmakeBool "BUILD_WITH_PIP" false) - (lib.cmakeFeature "Tensile_LOGIC" tensileLogic) - (lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion) (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) - (lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat) - (lib.cmakeBool "Tensile_PRINT_DEBUG" true) - ] - ++ lib.optionals (buildTests || buildBenchmarks) [ - (lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis") ]; + passthru.amdgpu_targets = gpuTargets'; + patches = [ (fetchpatch { name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; @@ -135,14 +175,17 @@ stdenv.mkDerivation (finalAttrs: { postPatch = '' substituteInPlace cmake/build-options.cmake \ --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' + substituteInPlace CMakeLists.txt \ + --replace-fail "4.42.0" "4.43.0" ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; + inherit (finalAttrs.src) owner; + inherit (finalAttrs.src) repo; }; + enableParallelBuilding = true; requiredSystemFeatures = [ "big-parallel" ]; meta = with lib; { @@ -151,8 +194,5 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = - versions.minor finalAttrs.version != versions.minor stdenv.cc.version - || versionAtLeast finalAttrs.version "7.0.0"; }; }) |
