From c141e99e062ba7fa5a140ebf4ca5d1abbe40987f Mon Sep 17 00:00:00 2001
From: Tamara Dahlgren <35777542+tldahlgren@users.noreply.github.com>
Date: Tue, 3 Sep 2019 16:31:27 -0700
Subject: Use spack/user-specific stage root by default; stage cleaning
 (#12516)

* When cleaning the stage root, only remove directories that appear
  to be used for staging Spack packages. Previously Spack was clearing
  all directories in the stage root, which could remove content not
  related to Spack if the user chose a staging root which contains
  files/directories not managed by Spack.
* The documentation is updated with warnings about choosing a stage
  directory that is only managed by Spack (although generally the
  check added in this PR for "spack clean" should avoid removing
  content that was not created by Spack)
* The default stage directory (in config.yaml) is now
  $tempdir/$user/spack-stage and the logic is updated to omit the
  $user portion of this path if $tempdir already contains a $user
  directory.
* When creating stage root assign user read/write permissions to all
  directories in the path under $user. Previously Spack was assigning
  the permissions of the first existing parent directory
---
 lib/spack/docs/config_yaml.rst            | 18 ++++---
 lib/spack/docs/configuration.rst          | 36 ++++++-------
 lib/spack/docs/packaging_guide.rst        |  3 +-
 lib/spack/docs/tutorial_configuration.rst | 12 ++++-
 lib/spack/spack/stage.py                  | 85 +++++++++++++++++++++++--------
 lib/spack/spack/test/data/config.yaml     |  5 +-
 lib/spack/spack/test/stage.py             | 79 +++++++++++++++++++---------
 7 files changed, 166 insertions(+), 72 deletions(-)

(limited to 'lib')

diff --git a/lib/spack/docs/config_yaml.rst b/lib/spack/docs/config_yaml.rst
index ef5d7b65be..92030b8ac9 100644
--- a/lib/spack/docs/config_yaml.rst
+++ b/lib/spack/docs/config_yaml.rst
@@ -86,22 +86,28 @@ Spack is designed to run out of a user home directory, and on many
 systems the home directory is a (slow) network file system.  On most systems,
 building in a temporary file system is faster.  Usually, there is also more
 space available in the temporary location than in the home directory.  If the
-username is not already in the path, Spack will also append the value of
-`$user` to the path.
+username is not already in the path, Spack will append the value of ``$user`` to
+the selected ``build_stage`` path.
 
-.. warning:: We highly recommend appending `spack-stage` to `$tempdir` in order
-   to ensure `spack clean` does not delete everything in `$tempdir`.
+.. warning:: We highly recommend specifying ``build_stage`` paths that
+   distinguish between staging and other activities to ensure 
+   ``spack clean`` does not inadvertently remove unrelated files.
+   This can be accomplished by using a combination of ``spack`` and or
+   ``stage`` in each path as shown in the default settings and documented
+   examples.
 
 By default, Spack's ``build_stage`` is configured like this:
 
 .. code-block:: yaml
 
    build_stage:
-    - $tempdir/spack-stage
+    - $tempdir/$user/spack-stage
+    - ~/.spack/stage
 
 This can be an ordered list of paths that Spack should search when trying to
 find a temporary directory for the build stage.  The list is searched in
 order, and Spack will use the first directory to which it has write access.
+
 Specifying `~/.spack/stage` first will ensure each user builds in their home
 directory.  The historic Spack stage path `$spack/var/spack/stage` will build
 directly inside the Spack instance.  See :ref:`config-file-variables` for more
@@ -116,7 +122,7 @@ deleted, but you can manually purge them with :ref:`spack clean --stage
 .. note::
 
    The build will fail if there is no writable directory in the ``build_stage``
-   list.
+   list, where any user- and site-specific setting will be searched first.
 
 --------------------
 ``source_cache``
diff --git a/lib/spack/docs/configuration.rst b/lib/spack/docs/configuration.rst
index 4ce08d067e..c89a90f2e9 100644
--- a/lib/spack/docs/configuration.rst
+++ b/lib/spack/docs/configuration.rst
@@ -36,8 +36,8 @@ Here is an example ``config.yaml`` file:
      module_roots:
        lmod: $spack/share/spack/lmod
      build_stage:
-       - $tempdir
-       - /nfs/tmp2/$user
+       - $tempdir/$user/spack-stage
+       - ~/.spack/stage
 
 Each Spack configuration file is nested under a top-level section
 corresponding to its name. So, ``config.yaml`` starts with ``config:``,
@@ -244,8 +244,8 @@ your configurations look like this:
      module_roots:
        lmod: $spack/share/spack/lmod
      build_stage:
-       - $tempdir
-       - /nfs/tmp2/$user
+       - $tempdir/$user/spack-stage
+       - ~/.spack/stage
 
 
 .. code-block:: yaml
@@ -269,8 +269,8 @@ command:
      module_roots:
        lmod: $spack/share/spack/lmod
      build_stage:
-       - $tempdir
-       - /nfs/tmp2/$user
+       - $tempdir/$user/spack-stage
+       - ~/.spack/stage
 
 
 .. _config-overrides:
@@ -312,8 +312,8 @@ Let's revisit the ``config.yaml`` example one more time. The
    :caption: $(prefix)/etc/spack/defaults/config.yaml
 
    build_stage:
-     - $tempdir
-     - /nfs/tmp2/$user
+     - $tempdir/$user/spack-stage
+     - ~/.spack/stage
 
 
 Suppose the user configuration adds its *own* list of ``build_stage``
@@ -323,7 +323,7 @@ paths:
    :caption: ~/.spack/config.yaml
 
    build_stage:
-     - /lustre-scratch/$user
+     - /lustre-scratch/$user/spack
      - ~/mystage
 
 
@@ -341,10 +341,10 @@ get config`` shows the result:
      module_roots:
        lmod: $spack/share/spack/lmod
      build_stage:
-       - /lustre-scratch/$user
+       - /lustre-scratch/$user/spack
        - ~/mystage
-       - $tempdir
-       - /nfs/tmp2/$user
+       - $tempdir/$user/spack-stage
+       - ~/.spack/stage
 
 
 As in :ref:`config-overrides`, the higher-precedence scope can
@@ -356,7 +356,7 @@ user config looked like this:
    :caption: ~/.spack/config.yaml
 
    build_stage::
-     - /lustre-scratch/$user
+     - /lustre-scratch/$user/spack
      - ~/mystage
 
 
@@ -371,7 +371,7 @@ The merged configuration would look like this:
      module_roots:
        lmod: $spack/share/spack/lmod
      build_stage:
-       - /lustre-scratch/$user
+       - /lustre-scratch/$user/spack
        - ~/mystage
 
 
@@ -465,8 +465,8 @@ account all scopes. For example, to see the fully merged
        lmod: $spack/share/spack/lmod
        dotkit: $spack/share/spack/dotkit
      build_stage:
-     - $tempdir
-     - /nfs/tmp2/$user
+     - $tempdir/$user/spack-stage
+     - ~/.spack/stage
      - $spack/var/spack/stage
      source_cache: $spack/var/spack/cache
      misc_cache: ~/.spack/cache
@@ -516,8 +516,8 @@ down the problem:
    /home/myuser/spack/etc/spack/defaults/config.yaml:34      lmod: $spack/share/spack/lmod
    /home/myuser/spack/etc/spack/defaults/config.yaml:35      dotkit: $spack/share/spack/dotkit
    /home/myuser/spack/etc/spack/defaults/config.yaml:49    build_stage:
-   /home/myuser/spack/etc/spack/defaults/config.yaml:50    - $tempdir
-   /home/myuser/spack/etc/spack/defaults/config.yaml:51    - /nfs/tmp2/$user
+   /home/myuser/spack/etc/spack/defaults/config.yaml:50    - $tempdir/$user/spack-stage
+   /home/myuser/spack/etc/spack/defaults/config.yaml:51    - ~/.spack/stage
    /home/myuser/spack/etc/spack/defaults/config.yaml:52    - $spack/var/spack/stage
    /home/myuser/spack/etc/spack/defaults/config.yaml:57    source_cache: $spack/var/spack/cache
    /home/myuser/spack/etc/spack/defaults/config.yaml:62    misc_cache: ~/.spack/cache
diff --git a/lib/spack/docs/packaging_guide.rst b/lib/spack/docs/packaging_guide.rst
index 6c3d7c0602..4b062c8a1a 100644
--- a/lib/spack/docs/packaging_guide.rst
+++ b/lib/spack/docs/packaging_guide.rst
@@ -3915,7 +3915,8 @@ The first step of ``spack install``.  Takes a spec and determines the
 correct download URL to use for the requested package version, then
 downloads the archive, checks it against an MD5 checksum, and stores
 it in a staging directory if the check was successful.  The staging
-directory will be located under ``$SPACK_HOME/var/spack``.
+directory will be located under the first writable directory in the
+``build_stage`` configuration setting.
 
 When run after the archive has already been downloaded, ``spack
 fetch`` is idempotent and will not download the archive again.
diff --git a/lib/spack/docs/tutorial_configuration.rst b/lib/spack/docs/tutorial_configuration.rst
index 6b97687cb9..565a6c02b5 100644
--- a/lib/spack/docs/tutorial_configuration.rst
+++ b/lib/spack/docs/tutorial_configuration.rst
@@ -849,7 +849,17 @@ from this file system with the following ``config.yaml``:
 
    config:
      build_stage:
-       - /scratch/$user
+       - /scratch/$user/spack-stage
+
+
+.. note::
+
+   It is important to distinguish the build stage directory from other
+   directories in your scratch space to ensure ``spack clean`` does not
+   inadvertently remove unrelated files.  This can be accomplished by
+   including a combination of ``spack`` and or ``stage`` in each path
+   as shown in the default settings and documented examples.  See
+   :ref:`config-yaml` for details.
 
 
 On systems with compilers that absolutely *require* environment variables
diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py
index 56d48fa59d..4e950482cf 100644
--- a/lib/spack/spack/stage.py
+++ b/lib/spack/spack/stage.py
@@ -4,6 +4,7 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
 import os
+import re
 import stat
 import sys
 import errno
@@ -36,6 +37,39 @@ _source_path_subdir = 'spack-src'
 _stage_prefix = 'spack-stage-'
 
 
+def _create_stage_root(path):
+    """Create the stage root directory and ensure appropriate access perms."""
+    assert path.startswith(os.path.sep) and len(path.strip()) > 1
+
+    curr_dir = os.path.sep
+    parts = path.strip(os.path.sep).split(os.path.sep)
+    for i, part in enumerate(parts):
+        curr_dir = os.path.join(curr_dir, part)
+        if not os.path.exists(curr_dir):
+            rest = os.path.join(os.path.sep, *parts[i + 1:])
+            user_parts = rest.partition(os.path.sep + getpass.getuser())
+            if len(user_parts[0]) > 0:
+                # Ensure access controls of subdirs created above `$user`
+                # inherit from the parent and share the group.
+                stats = os.stat(os.path.dirname(curr_dir))
+                curr_dir = ''.join([curr_dir, user_parts[0]])
+                mkdirp(curr_dir, group=stats.st_gid, mode=stats.st_mode)
+
+            user_subdirs = ''.join(user_parts[1:])
+            if len(user_subdirs) > 0:
+                # Ensure access controls of subdirs from `$user` on down are
+                # restricted to the user.
+                curr_dir = ''.join([curr_dir, user_subdirs])
+                mkdirp(curr_dir, mode=stat.S_IRWXU)
+
+            assert os.getuid() == os.stat(curr_dir).st_uid
+            break
+
+    if not can_access(path):
+        raise OSError(errno.EACCES,
+                      'Cannot access %s: Permission denied' % curr_dir)
+
+
 def _first_accessible_path(paths):
     """Find the first path that is accessible, creating it if necessary."""
     for path in paths:
@@ -45,20 +79,9 @@ def _first_accessible_path(paths):
                 if can_access(path):
                     return path
             else:
-                # The path doesn't exist so create it and adjust permissions
-                # and group as needed (e.g., shared ``$tempdir``).
-                prefix = os.path.sep
-                parts = path.strip(os.path.sep).split(os.path.sep)
-                for part in parts:
-                    prefix = os.path.join(prefix, part)
-                    if not os.path.exists(prefix):
-                        break
-                parent = os.path.dirname(prefix)
-                gid = os.stat(parent).st_gid
-                mkdirp(path, group=gid, default_perms='parents')
-
-                if can_access(path):
-                    return path
+                # Now create the stage root with the proper group/perms.
+                _create_stage_root(path)
+                return path
 
         except OSError as e:
             tty.debug('OSError while checking stage path %s: %s' % (
@@ -67,6 +90,23 @@ def _first_accessible_path(paths):
     return None
 
 
+def _resolve_paths(candidates):
+    """Resolve paths, removing extra $user from $tempdir if needed."""
+    temp_path = sup.canonicalize_path('$tempdir')
+    tmp_has_usr = getpass.getuser() in temp_path.split(os.path.sep)
+
+    paths = []
+    for path in candidates:
+        # First remove the extra `$user` node from a `$tempdir/$user` entry
+        # for hosts that automatically append `$user` to `$tempdir`.
+        if path.startswith(os.path.join('$tempdir', '$user')) and tmp_has_usr:
+            path = os.path.join('$tempdir', path[15:])
+
+        paths.append(sup.canonicalize_path(path))
+
+    return paths
+
+
 # Cached stage path root
 _stage_root = None
 
@@ -79,18 +119,19 @@ def get_stage_root():
         if isinstance(candidates, string_types):
             candidates = [candidates]
 
-        resolved_candidates = [sup.canonicalize_path(x) for x in candidates]
+        resolved_candidates = _resolve_paths(candidates)
         path = _first_accessible_path(resolved_candidates)
         if not path:
             raise StageError("No accessible stage paths in:",
                              ' '.join(resolved_candidates))
 
-        # Ensure that any temp path is unique per user, so users don't
-        # fight over shared temporary space.
+        # Ensure that path is unique per user in an attempt to avoid
+        # conflicts in shared temporary spaces.  Emulate permissions from
+        # `tempfile.mkdtemp`.
         user = getpass.getuser()
         if user not in path:
             path = os.path.join(path, user)
-            mkdirp(path)
+            mkdirp(path, mode=stat.S_IRWXU)
 
         _stage_root = path
 
@@ -636,9 +677,13 @@ def purge():
     """Remove all build directories in the top-level stage path."""
     root = get_stage_root()
     if os.path.isdir(root):
+        # TODO: Figure out a "standard" way to identify the hash length
+        # TODO: Should this support alternate base represenations?
+        dir_expr = re.compile(r'.*-[0-9a-f]{32}$')
         for stage_dir in os.listdir(root):
-            stage_path = os.path.join(root, stage_dir)
-            remove_linked_tree(stage_path)
+            if re.match(dir_expr, stage_dir) or stage_dir == '.lock':
+                stage_path = os.path.join(root, stage_dir)
+                remove_linked_tree(stage_path)
 
 
 class StageError(spack.error.SpackError):
diff --git a/lib/spack/spack/test/data/config.yaml b/lib/spack/spack/test/data/config.yaml
index 501001ff00..2d2c2e356a 100644
--- a/lib/spack/spack/test/data/config.yaml
+++ b/lib/spack/spack/test/data/config.yaml
@@ -5,9 +5,8 @@ config:
   - $spack/lib/spack/spack/test/data/templates
   - $spack/lib/spack/spack/test/data/templates_again
   build_stage:
-  - $tempdir/spack-stage-test
-  - /nfs/tmp2/$user
-  - $spack/var/spack/stage
+  - $tempdir/$user/spack-stage
+  - ~/.spack/stage
   source_cache: $spack/var/spack/cache
   misc_cache: ~/.spack/cache
   verify_ssl: true
diff --git a/lib/spack/spack/test/stage.py b/lib/spack/spack/test/stage.py
index b8b217032a..5e21c48cc4 100644
--- a/lib/spack/spack/test/stage.py
+++ b/lib/spack/spack/test/stage.py
@@ -4,10 +4,10 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
 """Test that the Stage class works correctly."""
-import grp
 import os
 import collections
 import shutil
+import stat
 import tempfile
 import getpass
 
@@ -406,6 +406,30 @@ def search_fn():
     return _Mock()
 
 
+def check_stage_dir_perms(prefix, path):
+    """Check the stage directory perms to ensure match expectations."""
+    # Ensure the path's subdirectories -- to `$user` -- have their parent's
+    # perms while those from `$user` on are owned and restricted to the
+    # user.
+    status = os.stat(prefix)
+    gid = status.st_gid
+    uid = os.getuid()
+    user = getpass.getuser()
+    parts = path[len(prefix) + 1:].split(os.path.sep)
+    have_user = False
+    for part in parts:
+        if part == user:
+            have_user = True
+        prefix = os.path.join(prefix, part)
+        prefix_status = os.stat(prefix)
+        if not have_user:
+            assert gid == prefix_status.st_gid
+            assert status.st_mode == prefix_status.st_mode
+        else:
+            assert uid == status.st_uid
+            assert status.st_mode & stat.S_IRWXU == stat.S_IRWXU
+
+
 @pytest.mark.usefixtures('mock_packages')
 class TestStage(object):
 
@@ -684,6 +708,7 @@ class TestStage(object):
         path = spack.stage._first_accessible_path(files)
         assert path == name
         assert os.path.isdir(path)
+        check_stage_dir_perms(str(tmpdir), path)
 
         # Ensure an existing path is returned
         spack_subdir = spack_dir.join('existing').ensure(dir=True)
@@ -691,31 +716,33 @@ class TestStage(object):
         path = spack.stage._first_accessible_path([subdir])
         assert path == subdir
 
+        # Ensure a path with a `$user` node has the right permissions
+        # for its subdirectories.
+        user = getpass.getuser()
+        user_dir = spack_dir.join(user, 'has', 'paths')
+        user_path = str(user_dir)
+        path = spack.stage._first_accessible_path([user_path])
+        assert path == user_path
+        check_stage_dir_perms(str(tmpdir), path)
+
         # Cleanup
         shutil.rmtree(str(name))
 
-    def test_first_accessible_perms(self, tmpdir):
-        """Test _first_accessible_path permissions."""
-        name = str(tmpdir.join('first', 'path'))
-        path = spack.stage._first_accessible_path([name])
+    def test_resolve_paths(self):
+        """Test _resolve_paths."""
 
-        # Ensure the non-existent path was created
-        assert path == name
-        assert os.path.isdir(path)
+        assert spack.stage._resolve_paths([]) == []
 
-        # Ensure the non-existent subdirectories have their parent's perms
-        prefix = str(tmpdir)
-        status = os.stat(prefix)
-        group = grp.getgrgid(status.st_gid)[0]
-        parts = path[len(prefix):].split(os.path.sep)
-        for part in parts:
-            prefix = os.path.join(prefix, part)
-            prefix_status = os.stat(prefix)
-            assert group == grp.getgrgid(os.stat(prefix).st_gid)[0]
-            assert status.st_mode == prefix_status.st_mode
+        paths = [os.path.join(os.path.sep, 'a', 'b', 'c')]
+        assert spack.stage._resolve_paths(paths) == paths
 
-        # Cleanup
-        shutil.rmtree(os.path.dirname(name))
+        tmp = '$tempdir'
+        paths = [os.path.join(tmp, 'stage'), os.path.join(tmp, '$user')]
+        can_paths = [canonicalize_path(paths[0]), canonicalize_path(tmp)]
+        user = getpass.getuser()
+        if user not in can_paths[1].split(os.path.sep):
+            can_paths[1] = os.path.join(can_paths[1], user)
+        assert spack.stage._resolve_paths(paths) == can_paths
 
     def test_get_stage_root_bad_path(self, clear_stage_root, bad_stage_path):
         """Ensure an invalid stage path root raises a StageError."""
@@ -756,15 +783,21 @@ class TestStage(object):
         # Make sure the cached stage path values are changed appropriately.
         assert spack.stage._stage_root == path
 
-        # Add then purge a couple of directories
-        dir1 = tmpdir.join('dir1')
+        # Add then purge a few directories
+        dir1 = tmpdir.join('stage-1234567890abcdef1234567890abcdef')
         dir1.ensure(dir=True)
-        dir2 = tmpdir.join('dir2')
+        dir2 = tmpdir.join('stage-abcdef12345678900987654321fedcba')
         dir2.ensure(dir=True)
+        dir3 = tmpdir.join('stage-a1b2c3')
+        dir3.ensure(dir=True)
 
         spack.stage.purge()
         assert not os.path.exists(str(dir1))
         assert not os.path.exists(str(dir2))
+        assert os.path.exists(str(dir3))
+
+        # Cleanup
+        shutil.rmtree(str(dir3))
 
     def test_get_stage_root_in_spack(self, clear_stage_root,
                                      instance_path_for_stage):
-- 
cgit v1.2.3-70-g09d2