From c141e99e062ba7fa5a140ebf4ca5d1abbe40987f Mon Sep 17 00:00:00 2001 From: Tamara Dahlgren <35777542+tldahlgren@users.noreply.github.com> Date: Tue, 3 Sep 2019 16:31:27 -0700 Subject: Use spack/user-specific stage root by default; stage cleaning (#12516) * When cleaning the stage root, only remove directories that appear to be used for staging Spack packages. Previously Spack was clearing all directories in the stage root, which could remove content not related to Spack if the user chose a staging root which contains files/directories not managed by Spack. * The documentation is updated with warnings about choosing a stage directory that is only managed by Spack (although generally the check added in this PR for "spack clean" should avoid removing content that was not created by Spack) * The default stage directory (in config.yaml) is now $tempdir/$user/spack-stage and the logic is updated to omit the $user portion of this path if $tempdir already contains a $user directory. * When creating stage root assign user read/write permissions to all directories in the path under $user. Previously Spack was assigning the permissions of the first existing parent directory --- etc/spack/defaults/config.yaml | 19 ++++--- lib/spack/docs/config_yaml.rst | 18 ++++--- lib/spack/docs/configuration.rst | 36 ++++++------- lib/spack/docs/packaging_guide.rst | 3 +- lib/spack/docs/tutorial_configuration.rst | 12 ++++- lib/spack/spack/stage.py | 85 +++++++++++++++++++++++-------- lib/spack/spack/test/data/config.yaml | 5 +- lib/spack/spack/test/stage.py | 79 +++++++++++++++++++--------- 8 files changed, 178 insertions(+), 79 deletions(-) diff --git a/etc/spack/defaults/config.yaml b/etc/spack/defaults/config.yaml index 33a37bbdfa..1b6d83f359 100644 --- a/etc/spack/defaults/config.yaml +++ b/etc/spack/defaults/config.yaml @@ -40,11 +40,8 @@ config: # Recommended options are given below. # # Builds can be faster in temporary directories on some (e.g., HPC) systems. - # Specifying `$tempdir` will ensure use of the system default temporary - # directory (as returned by `tempfile.gettempdir()`). Spack will append - # `spack-stage` and, if the username is not already in the path, the value - # of `$user` to the path. The latter is used to avoid conflicts between - # users in shared temporary spaces. + # Specifying `$tempdir` will ensure use of the default temporary directory + # (i.e., ``$TMP` or ``$TMPDIR``). # # Another option that prevents conflicts and potential permission issues is # to specify `~/.spack/stage`, which ensures each user builds in their home @@ -52,13 +49,21 @@ config: # # A more traditional path uses the value of `$spack/var/spack/stage`, which # builds directly inside Spack's instance without staging them in a - # temporary space. + # temporary space. Problems with specifying a path inside a Spack instance + # are that it precludes its use as a system package and its ability to be + # pip installable. + # + # In any case, if the username is not already in the path, Spack will append + # the value of `$user` in an attempt to avoid potential conflicts between + # users in shared temporary spaces. # # The build stage can be purged with `spack clean --stage` and # `spack clean -a`, so it is important that the specified directory uniquely # identifies Spack staging to avoid accidentally wiping out non-Spack work. build_stage: - - $tempdir/spack-stage + - $tempdir/$user/spack-stage + - ~/.spack/stage + # - $spack/var/spack/stage # Cache directory for already downloaded source tarballs and archived diff --git a/lib/spack/docs/config_yaml.rst b/lib/spack/docs/config_yaml.rst index ef5d7b65be..92030b8ac9 100644 --- a/lib/spack/docs/config_yaml.rst +++ b/lib/spack/docs/config_yaml.rst @@ -86,22 +86,28 @@ Spack is designed to run out of a user home directory, and on many systems the home directory is a (slow) network file system. On most systems, building in a temporary file system is faster. Usually, there is also more space available in the temporary location than in the home directory. If the -username is not already in the path, Spack will also append the value of -`$user` to the path. +username is not already in the path, Spack will append the value of ``$user`` to +the selected ``build_stage`` path. -.. warning:: We highly recommend appending `spack-stage` to `$tempdir` in order - to ensure `spack clean` does not delete everything in `$tempdir`. +.. warning:: We highly recommend specifying ``build_stage`` paths that + distinguish between staging and other activities to ensure + ``spack clean`` does not inadvertently remove unrelated files. + This can be accomplished by using a combination of ``spack`` and or + ``stage`` in each path as shown in the default settings and documented + examples. By default, Spack's ``build_stage`` is configured like this: .. code-block:: yaml build_stage: - - $tempdir/spack-stage + - $tempdir/$user/spack-stage + - ~/.spack/stage This can be an ordered list of paths that Spack should search when trying to find a temporary directory for the build stage. The list is searched in order, and Spack will use the first directory to which it has write access. + Specifying `~/.spack/stage` first will ensure each user builds in their home directory. The historic Spack stage path `$spack/var/spack/stage` will build directly inside the Spack instance. See :ref:`config-file-variables` for more @@ -116,7 +122,7 @@ deleted, but you can manually purge them with :ref:`spack clean --stage .. note:: The build will fail if there is no writable directory in the ``build_stage`` - list. + list, where any user- and site-specific setting will be searched first. -------------------- ``source_cache`` diff --git a/lib/spack/docs/configuration.rst b/lib/spack/docs/configuration.rst index 4ce08d067e..c89a90f2e9 100644 --- a/lib/spack/docs/configuration.rst +++ b/lib/spack/docs/configuration.rst @@ -36,8 +36,8 @@ Here is an example ``config.yaml`` file: module_roots: lmod: $spack/share/spack/lmod build_stage: - - $tempdir - - /nfs/tmp2/$user + - $tempdir/$user/spack-stage + - ~/.spack/stage Each Spack configuration file is nested under a top-level section corresponding to its name. So, ``config.yaml`` starts with ``config:``, @@ -244,8 +244,8 @@ your configurations look like this: module_roots: lmod: $spack/share/spack/lmod build_stage: - - $tempdir - - /nfs/tmp2/$user + - $tempdir/$user/spack-stage + - ~/.spack/stage .. code-block:: yaml @@ -269,8 +269,8 @@ command: module_roots: lmod: $spack/share/spack/lmod build_stage: - - $tempdir - - /nfs/tmp2/$user + - $tempdir/$user/spack-stage + - ~/.spack/stage .. _config-overrides: @@ -312,8 +312,8 @@ Let's revisit the ``config.yaml`` example one more time. The :caption: $(prefix)/etc/spack/defaults/config.yaml build_stage: - - $tempdir - - /nfs/tmp2/$user + - $tempdir/$user/spack-stage + - ~/.spack/stage Suppose the user configuration adds its *own* list of ``build_stage`` @@ -323,7 +323,7 @@ paths: :caption: ~/.spack/config.yaml build_stage: - - /lustre-scratch/$user + - /lustre-scratch/$user/spack - ~/mystage @@ -341,10 +341,10 @@ get config`` shows the result: module_roots: lmod: $spack/share/spack/lmod build_stage: - - /lustre-scratch/$user + - /lustre-scratch/$user/spack - ~/mystage - - $tempdir - - /nfs/tmp2/$user + - $tempdir/$user/spack-stage + - ~/.spack/stage As in :ref:`config-overrides`, the higher-precedence scope can @@ -356,7 +356,7 @@ user config looked like this: :caption: ~/.spack/config.yaml build_stage:: - - /lustre-scratch/$user + - /lustre-scratch/$user/spack - ~/mystage @@ -371,7 +371,7 @@ The merged configuration would look like this: module_roots: lmod: $spack/share/spack/lmod build_stage: - - /lustre-scratch/$user + - /lustre-scratch/$user/spack - ~/mystage @@ -465,8 +465,8 @@ account all scopes. For example, to see the fully merged lmod: $spack/share/spack/lmod dotkit: $spack/share/spack/dotkit build_stage: - - $tempdir - - /nfs/tmp2/$user + - $tempdir/$user/spack-stage + - ~/.spack/stage - $spack/var/spack/stage source_cache: $spack/var/spack/cache misc_cache: ~/.spack/cache @@ -516,8 +516,8 @@ down the problem: /home/myuser/spack/etc/spack/defaults/config.yaml:34 lmod: $spack/share/spack/lmod /home/myuser/spack/etc/spack/defaults/config.yaml:35 dotkit: $spack/share/spack/dotkit /home/myuser/spack/etc/spack/defaults/config.yaml:49 build_stage: - /home/myuser/spack/etc/spack/defaults/config.yaml:50 - $tempdir - /home/myuser/spack/etc/spack/defaults/config.yaml:51 - /nfs/tmp2/$user + /home/myuser/spack/etc/spack/defaults/config.yaml:50 - $tempdir/$user/spack-stage + /home/myuser/spack/etc/spack/defaults/config.yaml:51 - ~/.spack/stage /home/myuser/spack/etc/spack/defaults/config.yaml:52 - $spack/var/spack/stage /home/myuser/spack/etc/spack/defaults/config.yaml:57 source_cache: $spack/var/spack/cache /home/myuser/spack/etc/spack/defaults/config.yaml:62 misc_cache: ~/.spack/cache diff --git a/lib/spack/docs/packaging_guide.rst b/lib/spack/docs/packaging_guide.rst index 6c3d7c0602..4b062c8a1a 100644 --- a/lib/spack/docs/packaging_guide.rst +++ b/lib/spack/docs/packaging_guide.rst @@ -3915,7 +3915,8 @@ The first step of ``spack install``. Takes a spec and determines the correct download URL to use for the requested package version, then downloads the archive, checks it against an MD5 checksum, and stores it in a staging directory if the check was successful. The staging -directory will be located under ``$SPACK_HOME/var/spack``. +directory will be located under the first writable directory in the +``build_stage`` configuration setting. When run after the archive has already been downloaded, ``spack fetch`` is idempotent and will not download the archive again. diff --git a/lib/spack/docs/tutorial_configuration.rst b/lib/spack/docs/tutorial_configuration.rst index 6b97687cb9..565a6c02b5 100644 --- a/lib/spack/docs/tutorial_configuration.rst +++ b/lib/spack/docs/tutorial_configuration.rst @@ -849,7 +849,17 @@ from this file system with the following ``config.yaml``: config: build_stage: - - /scratch/$user + - /scratch/$user/spack-stage + + +.. note:: + + It is important to distinguish the build stage directory from other + directories in your scratch space to ensure ``spack clean`` does not + inadvertently remove unrelated files. This can be accomplished by + including a combination of ``spack`` and or ``stage`` in each path + as shown in the default settings and documented examples. See + :ref:`config-yaml` for details. On systems with compilers that absolutely *require* environment variables diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index 56d48fa59d..4e950482cf 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import os +import re import stat import sys import errno @@ -36,6 +37,39 @@ _source_path_subdir = 'spack-src' _stage_prefix = 'spack-stage-' +def _create_stage_root(path): + """Create the stage root directory and ensure appropriate access perms.""" + assert path.startswith(os.path.sep) and len(path.strip()) > 1 + + curr_dir = os.path.sep + parts = path.strip(os.path.sep).split(os.path.sep) + for i, part in enumerate(parts): + curr_dir = os.path.join(curr_dir, part) + if not os.path.exists(curr_dir): + rest = os.path.join(os.path.sep, *parts[i + 1:]) + user_parts = rest.partition(os.path.sep + getpass.getuser()) + if len(user_parts[0]) > 0: + # Ensure access controls of subdirs created above `$user` + # inherit from the parent and share the group. + stats = os.stat(os.path.dirname(curr_dir)) + curr_dir = ''.join([curr_dir, user_parts[0]]) + mkdirp(curr_dir, group=stats.st_gid, mode=stats.st_mode) + + user_subdirs = ''.join(user_parts[1:]) + if len(user_subdirs) > 0: + # Ensure access controls of subdirs from `$user` on down are + # restricted to the user. + curr_dir = ''.join([curr_dir, user_subdirs]) + mkdirp(curr_dir, mode=stat.S_IRWXU) + + assert os.getuid() == os.stat(curr_dir).st_uid + break + + if not can_access(path): + raise OSError(errno.EACCES, + 'Cannot access %s: Permission denied' % curr_dir) + + def _first_accessible_path(paths): """Find the first path that is accessible, creating it if necessary.""" for path in paths: @@ -45,20 +79,9 @@ def _first_accessible_path(paths): if can_access(path): return path else: - # The path doesn't exist so create it and adjust permissions - # and group as needed (e.g., shared ``$tempdir``). - prefix = os.path.sep - parts = path.strip(os.path.sep).split(os.path.sep) - for part in parts: - prefix = os.path.join(prefix, part) - if not os.path.exists(prefix): - break - parent = os.path.dirname(prefix) - gid = os.stat(parent).st_gid - mkdirp(path, group=gid, default_perms='parents') - - if can_access(path): - return path + # Now create the stage root with the proper group/perms. + _create_stage_root(path) + return path except OSError as e: tty.debug('OSError while checking stage path %s: %s' % ( @@ -67,6 +90,23 @@ def _first_accessible_path(paths): return None +def _resolve_paths(candidates): + """Resolve paths, removing extra $user from $tempdir if needed.""" + temp_path = sup.canonicalize_path('$tempdir') + tmp_has_usr = getpass.getuser() in temp_path.split(os.path.sep) + + paths = [] + for path in candidates: + # First remove the extra `$user` node from a `$tempdir/$user` entry + # for hosts that automatically append `$user` to `$tempdir`. + if path.startswith(os.path.join('$tempdir', '$user')) and tmp_has_usr: + path = os.path.join('$tempdir', path[15:]) + + paths.append(sup.canonicalize_path(path)) + + return paths + + # Cached stage path root _stage_root = None @@ -79,18 +119,19 @@ def get_stage_root(): if isinstance(candidates, string_types): candidates = [candidates] - resolved_candidates = [sup.canonicalize_path(x) for x in candidates] + resolved_candidates = _resolve_paths(candidates) path = _first_accessible_path(resolved_candidates) if not path: raise StageError("No accessible stage paths in:", ' '.join(resolved_candidates)) - # Ensure that any temp path is unique per user, so users don't - # fight over shared temporary space. + # Ensure that path is unique per user in an attempt to avoid + # conflicts in shared temporary spaces. Emulate permissions from + # `tempfile.mkdtemp`. user = getpass.getuser() if user not in path: path = os.path.join(path, user) - mkdirp(path) + mkdirp(path, mode=stat.S_IRWXU) _stage_root = path @@ -636,9 +677,13 @@ def purge(): """Remove all build directories in the top-level stage path.""" root = get_stage_root() if os.path.isdir(root): + # TODO: Figure out a "standard" way to identify the hash length + # TODO: Should this support alternate base represenations? + dir_expr = re.compile(r'.*-[0-9a-f]{32}$') for stage_dir in os.listdir(root): - stage_path = os.path.join(root, stage_dir) - remove_linked_tree(stage_path) + if re.match(dir_expr, stage_dir) or stage_dir == '.lock': + stage_path = os.path.join(root, stage_dir) + remove_linked_tree(stage_path) class StageError(spack.error.SpackError): diff --git a/lib/spack/spack/test/data/config.yaml b/lib/spack/spack/test/data/config.yaml index 501001ff00..2d2c2e356a 100644 --- a/lib/spack/spack/test/data/config.yaml +++ b/lib/spack/spack/test/data/config.yaml @@ -5,9 +5,8 @@ config: - $spack/lib/spack/spack/test/data/templates - $spack/lib/spack/spack/test/data/templates_again build_stage: - - $tempdir/spack-stage-test - - /nfs/tmp2/$user - - $spack/var/spack/stage + - $tempdir/$user/spack-stage + - ~/.spack/stage source_cache: $spack/var/spack/cache misc_cache: ~/.spack/cache verify_ssl: true diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py index b8b217032a..5e21c48cc4 100644 --- a/lib/spack/spack/test/stage.py +++ b/lib/spack/spack/test/stage.py @@ -4,10 +4,10 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) """Test that the Stage class works correctly.""" -import grp import os import collections import shutil +import stat import tempfile import getpass @@ -406,6 +406,30 @@ def search_fn(): return _Mock() +def check_stage_dir_perms(prefix, path): + """Check the stage directory perms to ensure match expectations.""" + # Ensure the path's subdirectories -- to `$user` -- have their parent's + # perms while those from `$user` on are owned and restricted to the + # user. + status = os.stat(prefix) + gid = status.st_gid + uid = os.getuid() + user = getpass.getuser() + parts = path[len(prefix) + 1:].split(os.path.sep) + have_user = False + for part in parts: + if part == user: + have_user = True + prefix = os.path.join(prefix, part) + prefix_status = os.stat(prefix) + if not have_user: + assert gid == prefix_status.st_gid + assert status.st_mode == prefix_status.st_mode + else: + assert uid == status.st_uid + assert status.st_mode & stat.S_IRWXU == stat.S_IRWXU + + @pytest.mark.usefixtures('mock_packages') class TestStage(object): @@ -684,6 +708,7 @@ class TestStage(object): path = spack.stage._first_accessible_path(files) assert path == name assert os.path.isdir(path) + check_stage_dir_perms(str(tmpdir), path) # Ensure an existing path is returned spack_subdir = spack_dir.join('existing').ensure(dir=True) @@ -691,31 +716,33 @@ class TestStage(object): path = spack.stage._first_accessible_path([subdir]) assert path == subdir + # Ensure a path with a `$user` node has the right permissions + # for its subdirectories. + user = getpass.getuser() + user_dir = spack_dir.join(user, 'has', 'paths') + user_path = str(user_dir) + path = spack.stage._first_accessible_path([user_path]) + assert path == user_path + check_stage_dir_perms(str(tmpdir), path) + # Cleanup shutil.rmtree(str(name)) - def test_first_accessible_perms(self, tmpdir): - """Test _first_accessible_path permissions.""" - name = str(tmpdir.join('first', 'path')) - path = spack.stage._first_accessible_path([name]) + def test_resolve_paths(self): + """Test _resolve_paths.""" - # Ensure the non-existent path was created - assert path == name - assert os.path.isdir(path) + assert spack.stage._resolve_paths([]) == [] - # Ensure the non-existent subdirectories have their parent's perms - prefix = str(tmpdir) - status = os.stat(prefix) - group = grp.getgrgid(status.st_gid)[0] - parts = path[len(prefix):].split(os.path.sep) - for part in parts: - prefix = os.path.join(prefix, part) - prefix_status = os.stat(prefix) - assert group == grp.getgrgid(os.stat(prefix).st_gid)[0] - assert status.st_mode == prefix_status.st_mode + paths = [os.path.join(os.path.sep, 'a', 'b', 'c')] + assert spack.stage._resolve_paths(paths) == paths - # Cleanup - shutil.rmtree(os.path.dirname(name)) + tmp = '$tempdir' + paths = [os.path.join(tmp, 'stage'), os.path.join(tmp, '$user')] + can_paths = [canonicalize_path(paths[0]), canonicalize_path(tmp)] + user = getpass.getuser() + if user not in can_paths[1].split(os.path.sep): + can_paths[1] = os.path.join(can_paths[1], user) + assert spack.stage._resolve_paths(paths) == can_paths def test_get_stage_root_bad_path(self, clear_stage_root, bad_stage_path): """Ensure an invalid stage path root raises a StageError.""" @@ -756,15 +783,21 @@ class TestStage(object): # Make sure the cached stage path values are changed appropriately. assert spack.stage._stage_root == path - # Add then purge a couple of directories - dir1 = tmpdir.join('dir1') + # Add then purge a few directories + dir1 = tmpdir.join('stage-1234567890abcdef1234567890abcdef') dir1.ensure(dir=True) - dir2 = tmpdir.join('dir2') + dir2 = tmpdir.join('stage-abcdef12345678900987654321fedcba') dir2.ensure(dir=True) + dir3 = tmpdir.join('stage-a1b2c3') + dir3.ensure(dir=True) spack.stage.purge() assert not os.path.exists(str(dir1)) assert not os.path.exists(str(dir2)) + assert os.path.exists(str(dir3)) + + # Cleanup + shutil.rmtree(str(dir3)) def test_get_stage_root_in_spack(self, clear_stage_root, instance_path_for_stage): -- cgit v1.2.3-70-g09d2