From 108a0c99f15acceb0c1302cbdbc370bb2a0d75fa Mon Sep 17 00:00:00 2001 From: Andrew W Elble Date: Fri, 27 Mar 2020 11:51:04 -0400 Subject: TensorFlow: Make horovod compilation work (#15692) * TensorFlow: Clean up/simplify the installation, make sure the headers are installed so that horovod can find them successfully. Fix the 2.0.* builds. * Backport of 837c8b6b upstream "Remove contrib cloud bigtable and storage ops/kernels." Allows 2.0.* releases to build with '--config=nogcp' * comment regarding tensorflow issue #31187 Co-authored-by: Andrew W Elble --- ...rib-cloud-bigtable-and-storage-ops-kernel.patch | 76 ++++++++++++++++++++++ .../builtin/packages/py-tensorflow/package.py | 44 +++++++------ 2 files changed, 100 insertions(+), 20 deletions(-) create mode 100644 var/spack/repos/builtin/packages/py-tensorflow/0001-Remove-contrib-cloud-bigtable-and-storage-ops-kernel.patch diff --git a/var/spack/repos/builtin/packages/py-tensorflow/0001-Remove-contrib-cloud-bigtable-and-storage-ops-kernel.patch b/var/spack/repos/builtin/packages/py-tensorflow/0001-Remove-contrib-cloud-bigtable-and-storage-ops-kernel.patch new file mode 100644 index 0000000000..bb742ff1b4 --- /dev/null +++ b/var/spack/repos/builtin/packages/py-tensorflow/0001-Remove-contrib-cloud-bigtable-and-storage-ops-kernel.patch @@ -0,0 +1,76 @@ +--- a/tensorflow/core/BUILD.orig 2020-01-22 18:43:57.000000000 -0500 ++++ b/tensorflow/core/BUILD 2020-03-26 16:33:17.318229701 -0400 +@@ -107,8 +107,6 @@ + load( + "//tensorflow/core/platform:default/build_config.bzl", + "tf_additional_all_protos", +- "tf_additional_cloud_kernel_deps", +- "tf_additional_cloud_op_deps", + "tf_additional_core_deps", + "tf_additional_cupti_wrapper_deps", + "tf_additional_device_tracer_cuda_deps", +@@ -1427,7 +1425,7 @@ + ]) + if_tensorrt([ + "//tensorflow/compiler/tf2tensorrt:trt_engine_resource_ops_op_lib", + "//tensorflow/compiler/tf2tensorrt:trt_op_libs", +- ]) + tf_additional_cloud_op_deps(), ++ ]), + alwayslink = 1, + ) + +@@ -1590,7 +1588,7 @@ + "//tensorflow/core/kernels:summary_kernels", + "//tensorflow/core/kernels:training_ops", + "//tensorflow/core/kernels:word2vec_kernels", +- ] + tf_additional_cloud_kernel_deps() + if_not_windows([ ++ ] + if_not_windows([ + "//tensorflow/core/kernels:fact_op", + "//tensorflow/core/kernels:array_not_windows", + "//tensorflow/core/kernels:math_not_windows", + +diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl +index b822effa14e5..61a19bdf128f 100644 +--- a/tensorflow/core/platform/default/build_config.bzl ++++ b/tensorflow/core/platform/default/build_config.bzl +@@ -682,38 +682,6 @@ def tf_additional_core_deps(): + ], + }) + +-# TODO(jart, jhseu): Delete when GCP is default on. +-def tf_additional_cloud_op_deps(): +- return select({ +- "//tensorflow:android": [], +- "//tensorflow:ios": [], +- "//tensorflow:linux_s390x": [], +- "//tensorflow:windows": [], +- "//tensorflow:api_version_2": [], +- "//tensorflow:windows_and_api_version_2": [], +- "//tensorflow:no_gcp_support": [], +- "//conditions:default": [ +- "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib", +- "//tensorflow/contrib/cloud:gcs_config_ops_op_lib", +- ], +- }) +- +-# TODO(jhseu): Delete when GCP is default on. +-def tf_additional_cloud_kernel_deps(): +- return select({ +- "//tensorflow:android": [], +- "//tensorflow:ios": [], +- "//tensorflow:linux_s390x": [], +- "//tensorflow:windows": [], +- "//tensorflow:api_version_2": [], +- "//tensorflow:windows_and_api_version_2": [], +- "//tensorflow:no_gcp_support": [], +- "//conditions:default": [ +- "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops", +- "//tensorflow/contrib/cloud/kernels:gcs_config_ops", +- ], +- }) +- + def tf_lib_proto_parsing_deps(): + return [ + ":protos_all_cc", +-- +2.19.1 + diff --git a/var/spack/repos/builtin/packages/py-tensorflow/package.py b/var/spack/repos/builtin/packages/py-tensorflow/package.py index d493c32283..0444a2e09a 100644 --- a/var/spack/repos/builtin/packages/py-tensorflow/package.py +++ b/var/spack/repos/builtin/packages/py-tensorflow/package.py @@ -3,8 +3,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -import glob -import os +from glob import glob import sys @@ -241,6 +240,11 @@ class PyTensorflow(Package, CudaPackage): patch('io_bazel_rules_docker2.patch', when='@1.15:2.0') # Avoide build error: "name 'new_http_archive' is not defined" patch('http_archive.patch', when='@1.12.3') + # Backport of 837c8b6b upstream + # "Remove contrib cloud bigtable and storage ops/kernels." + # Allows 2.0.* releases to build with '--config=nogcp' + patch('0001-Remove-contrib-cloud-bigtable-and-storage-ops-kernel.patch', + when='@2.0.0:2.0.1') phases = ['configure', 'build', 'install'] @@ -581,6 +585,12 @@ class PyTensorflow(Package, CudaPackage): spec['nccl'].prefix.include + '"', '.tf_configure.bazelrc') + # see tensorflow issue #31187 on github + if spec.satisfies('@2.0.0:2.0.1'): + filter_file(r'\#define RUY_DONOTUSEDIRECTLY_AVX512 1', + '#define RUY_DONOTUSEDIRECTLY_AVX512 0', + 'tensorflow/lite/experimental/ruy/platform.h') + if spec.satisfies('+cuda'): libs = spec['cuda'].libs.directories libs.extend(spec['cudnn'].libs.directories) @@ -679,30 +689,24 @@ class PyTensorflow(Package, CudaPackage): build_pip_package = Executable( 'bazel-bin/tensorflow/tools/pip_package/build_pip_package') - build_pip_package(tmp_path) + buildpath = join_path(self.stage.source_path, 'spack-build') + build_pip_package('--src', buildpath) def install(self, spec, prefix): - with working_dir('spack-build', create=True): - for fn in glob.iglob(join_path( - '../bazel-bin/tensorflow/tools/pip_package', - 'build_pip_package.runfiles/org_tensorflow/*')): - dst = os.path.basename(fn) - if not os.path.exists(dst): - os.symlink(fn, dst) - for fn in glob.iglob('../tensorflow/tools/pip_package/*'): - dst = os.path.basename(fn) - if not os.path.exists(dst): - os.symlink(fn, dst) - - # macOS is case-insensitive, and BUILD file in directory - # containing setup.py causes the following error message: - # error: could not create 'build': File exists - # Delete BUILD file to prevent this. - os.remove('BUILD') + buildpath = join_path(self.stage.source_path, 'spack-build') + with working_dir(buildpath): setup_py('install', '--prefix={0}'.format(prefix), '--single-version-externally-managed', '--root=/') + site_packages_dir = join_path( + prefix.lib, + ('python' + str(self.spec['python'].version.up_to(2))), + 'site-packages') + fn = glob(join_path(site_packages_dir, "tensorflow-*")) + incpath = join_path(fn[0], "tensorflow/include") + setup_py('install_headers', '--install-dir={0}'.format(incpath)) + @run_after('install') @on_package_attributes(run_tests=True) def import_module_test(self): -- cgit v1.2.3-60-g2f50