From d2c2e000a77d1d0f59a311a1e0ad49f8498d787a Mon Sep 17 00:00:00 2001 From: kolamsrinivas <67026525+kolamsrinivas@users.noreply.github.com> Date: Wed, 15 Jul 2020 16:45:22 -0700 Subject: changes to py-torch recipe to enable rocm build (#17410) * changes to recipe to enable rocm build * fixing flake8 issue * addressed the review comment --- .../repos/builtin/packages/py-torch/package.py | 15 +++- .../repos/builtin/packages/py-torch/rocm.patch | 98 ++++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 var/spack/repos/builtin/packages/py-torch/rocm.patch diff --git a/var/spack/repos/builtin/packages/py-torch/package.py b/var/spack/repos/builtin/packages/py-torch/package.py index 1ffeb81b23..bebf83d3d6 100644 --- a/var/spack/repos/builtin/packages/py-torch/package.py +++ b/var/spack/repos/builtin/packages/py-torch/package.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) from spack import * +import os class PyTorch(PythonPackage, CudaPackage): @@ -68,6 +69,7 @@ class PyTorch(PythonPackage, CudaPackage): variant('cuda', default=True, description='Build with CUDA') variant('cudnn', default=True, description='Enables the cuDNN build') + variant('rocm', default=False, description='Build with ROCm build') variant('magma', default=False, description='Enables the MAGMA build') variant('fbgemm', default=False, description='Enables the FBGEMM build') variant('test', default=False, description='Enables the test build') @@ -112,6 +114,7 @@ class PyTorch(PythonPackage, CudaPackage): conflicts('cuda_arch=none', when='+cuda', msg='Must specify CUDA compute capabilities of your GPU, see ' 'https://developer.nvidia.com/cuda-gpus') + conflicts('+rocm', when='+cuda') # Required dependencies depends_on('cmake@3.5:', type='build') @@ -173,6 +176,9 @@ class PyTorch(PythonPackage, CudaPackage): # Fixes CMake configuration error when XNNPACK is disabled patch('xnnpack.patch', when='@1.5.0:1.5.999') + # Fixes Build error for when ROCm is enable for pytorch-1.5 release + patch('rocm.patch', when='@1.5.0:1.5.999+rocm') + # https://github.com/pytorch/pytorch/pull/37086 # Fixes compilation with Clang 9.0.0 and Apple Clang 11.0.3 patch('https://github.com/pytorch/pytorch/commit/e921cd222a8fbeabf5a3e74e83e0d8dfb01aa8b5.patch', @@ -244,7 +250,9 @@ class PyTorch(PythonPackage, CudaPackage): enable_or_disable('fbgemm') enable_or_disable('test', keyword='BUILD') - + enable_or_disable('rocm') + if '+rocm' in self.spec: + env.set('USE_MKLDNN', 0) if '+miopen' in self.spec: env.set('MIOPEN_LIB_DIR', self.spec['miopen'].libs.directories[0]) env.set('MIOPEN_INCLUDE_DIR', self.spec['miopen'].prefix.include) @@ -297,6 +305,11 @@ class PyTorch(PythonPackage, CudaPackage): enable_or_disable('zstd', newer=True) enable_or_disable('tbb', newer=True) + @run_before('install') + def build_amd(self): + if '+rocm' in self.spec: + python(os.path.join('tools', 'amd_build', 'build_amd.py')) + def install_test(self): with working_dir('test'): python('run_test.py') diff --git a/var/spack/repos/builtin/packages/py-torch/rocm.patch b/var/spack/repos/builtin/packages/py-torch/rocm.patch new file mode 100644 index 0000000000..b50cc7e159 --- /dev/null +++ b/var/spack/repos/builtin/packages/py-torch/rocm.patch @@ -0,0 +1,98 @@ +diff --git a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h +index 9cd678dfb4cc7..4630465115c7c 100644 +--- a/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h ++++ b/aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h +@@ -67,6 +67,14 @@ namespace at { namespace cuda { + // + // HIP doesn't have + // cuGetErrorString (maps to non-functional hipGetErrorString___) ++// ++// HIP from ROCm 3.5 on renamed hipOccupancyMaxActiveBlocksPerMultiprocessor ++// to hipModuleOccupancyMaxActiveBlocksPerMultiprocessor. ++#if HIP_VERSION < 305 ++#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR hipOccupancyMaxActiveBlocksPerMultiprocessor ++#else ++#define HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR cuOccupancyMaxActiveBlocksPerMultiprocessor ++#endif + + #define AT_FORALL_NVRTC(_) \ + _(nvrtcVersion) \ +@@ -76,7 +84,7 @@ namespace at { namespace cuda { + _(nvrtcGetPTX) \ + _(cuModuleLoadData) \ + _(cuModuleGetFunction) \ +- _(cuOccupancyMaxActiveBlocksPerMultiprocessor) \ ++ _(HIPOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR)\ + _(nvrtcGetErrorString) \ + _(nvrtcGetProgramLogSize) \ + _(nvrtcGetProgramLog) \ +diff --git a/aten/src/ATen/native/cuda/SoftMax.cu b/aten/src/ATen/native/cuda/SoftMax.cu +index da1995123ecfc..f935eb4ef3d0e 100644 +--- a/aten/src/ATen/native/cuda/SoftMax.cu ++++ b/aten/src/ATen/native/cuda/SoftMax.cu +@@ -127,8 +127,8 @@ void SpatialSoftMax_getLaunchSizes( + uint32_t block_threads = block.x * block.y; + smem_size = block.x == 1 ? 0 : block_threads * sizeof(accscalar_t); + int max_active_blocks; +-#ifdef __HIP_PLATFORM_HCC__ +- // XXX HIP function signature is not compatible yet. ++#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305 ++ // HIP function signature is not compatible yet. + uint32_t max_blocks; + cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, + k, block_threads, smem_size); +diff --git a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp +index 5586e49919727..27315ee475277 100644 +--- a/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp ++++ b/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp +@@ -140,10 +140,10 @@ FusedKernelCUDA::FusedKernelCUDA( + nvrtc().cuModuleGetFunction(&function_, module_, name_.c_str())); + + // Computes max blocks +-#ifdef __HIP_PLATFORM_HCC__ +- // XXX HIP function signature is not compatible yet ++#if defined(__HIP_PLATFORM_HCC__) && HIP_VERSION < 305 ++ // HIP function signature is not compatible yet + uint32_t max_blocks; +- AT_CUDA_DRIVER_CHECK(nvrtc().cuOccupancyMaxActiveBlocksPerMultiprocessor( ++ AT_CUDA_DRIVER_CHECK(nvrtc().hipOccupancyMaxActiveBlocksPerMultiprocessor( + &max_blocks, function_, 128, 0)); + maxBlocks_ = max_blocks; + #else +diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py +index 7e21363cbe6af..26f269d92ae38 100644 +--- a/torch/utils/hipify/cuda_to_hip_mappings.py ++++ b/torch/utils/hipify/cuda_to_hip_mappings.py +@@ -2890,7 +2890,7 @@ + ( + "cuOccupancyMaxActiveBlocksPerMultiprocessor", + ( +- "hipOccupancyMaxActiveBlocksPerMultiprocessor", ++ "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", + CONV_OCCUPANCY, + API_DRIVER, + ), +@@ -2898,7 +2898,7 @@ + ( + "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", + ( +- "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", ++ "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", + CONV_OCCUPANCY, + API_DRIVER, + HIP_UNSUPPORTED, +@@ -2906,12 +2906,12 @@ + ), + ( + "cuOccupancyMaxPotentialBlockSize", +- ("hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER), ++ ("hipModuleOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER), + ), + ( + "cuOccupancyMaxPotentialBlockSizeWithFlags", + ( +- "hipOccupancyMaxPotentialBlockSizeWithFlags", ++ "hipModuleOccupancyMaxPotentialBlockSizeWithFlags", + CONV_OCCUPANCY, + API_DRIVER, + HIP_UNSUPPORTED, -- cgit v1.2.3-60-g2f50