From 2fe1ecbaa2f87bc3f11f73ee41828ea5b9a6b3d5 Mon Sep 17 00:00:00 2001
From: Massimiliano Culpo <massimiliano.culpo@gmail.com>
Date: Tue, 16 Jul 2019 17:54:33 +0200
Subject: Ignore Modules v4 environment variables in `from_sourcing_file`
 (#10753)

* from_sourcing_file: fixed a bug + added a few ignored variables

closes #7536

Credits for this change goes to mgsternberg (original author of #7536)

The new variables being ignored are specific to Modules v4.

* Use Spack Executable in 'EnvironmentModifications.from_sourcing_file'

Using this class avoids duplicating lower level logic to decode
stdout and handle non-zero return codes

* Extracted a function that returns the environment after sourcing files

The logic in `EnvironmentModifications.from_sourcing_file` has been
simplified by extracting a function that returns a dictionary with the
environment one would have after sourcing the files passed as argument.

* Further refactoring of EnvironmentModifications.from_sourcing_file

Extracted a function that sanitizes a dictionary removing keys that are
blacklisted, but keeping those that are whitelisted. Blacklisting and
whitelisting can be done on literals or regex.

Extracted a new factory that creates an instance of
EnvironmentModifications from a diff of two environments.

* Added unit tests

* PS1 is blacklisted + more readable names for some variables
---
 lib/spack/spack/test/data/sourceme_unset.sh       |   8 +
 lib/spack/spack/test/environment_modifications.py | 129 +++++++++-
 lib/spack/spack/util/environment.py               | 291 ++++++++++++++--------
 3 files changed, 311 insertions(+), 117 deletions(-)
 create mode 100644 lib/spack/spack/test/data/sourceme_unset.sh

diff --git a/lib/spack/spack/test/data/sourceme_unset.sh b/lib/spack/spack/test/data/sourceme_unset.sh
new file mode 100644
index 0000000000..e14aeb3f89
--- /dev/null
+++ b/lib/spack/spack/test/data/sourceme_unset.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+#
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+unset UNSET_ME
diff --git a/lib/spack/spack/test/environment_modifications.py b/lib/spack/spack/test/environment_modifications.py
index 664a57a92a..b8bc7d3c34 100644
--- a/lib/spack/spack/test/environment_modifications.py
+++ b/lib/spack/spack/test/environment_modifications.py
@@ -14,6 +14,9 @@ from spack.util.environment import SetEnv, UnsetEnv
 from spack.util.environment import filter_system_paths, is_system_path
 
 
+datadir = os.path.join(spack_root, 'lib', 'spack', 'spack', 'test', 'data')
+
+
 def test_inspect_path(tmpdir):
     inspections = {
         'bin': ['PATH'],
@@ -55,7 +58,7 @@ def test_exclude_paths_from_inspection():
 
 
 @pytest.fixture()
-def prepare_environment_for_tests():
+def prepare_environment_for_tests(working_env):
     """Sets a few dummy variables in the current environment, that will be
     useful for the tests below.
     """
@@ -67,11 +70,6 @@ def prepare_environment_for_tests():
     os.environ['PATH_LIST_WITH_SYSTEM_PATHS'] \
         = '/usr/include:' + os.environ['REMOVE_PATH_LIST']
     os.environ['PATH_LIST_WITH_DUPLICATES'] = os.environ['REMOVE_PATH_LIST']
-    yield
-    for x in ('UNSET_ME', 'EMPTY_PATH_LIST', 'PATH_LIST', 'REMOVE_PATH_LIST',
-              'PATH_LIST_WITH_SYSTEM_PATHS', 'PATH_LIST_WITH_DUPLICATES'):
-        if x in os.environ:
-            del os.environ[x]
 
 
 @pytest.fixture
@@ -109,19 +107,13 @@ def miscellaneous_paths():
 @pytest.fixture
 def files_to_be_sourced():
     """Returns a list of files to be sourced"""
-    datadir = os.path.join(
-        spack_root, 'lib', 'spack', 'spack', 'test', 'data'
-    )
-
-    files = [
+    return [
         os.path.join(datadir, 'sourceme_first.sh'),
         os.path.join(datadir, 'sourceme_second.sh'),
         os.path.join(datadir, 'sourceme_parameters.sh'),
         os.path.join(datadir, 'sourceme_unicode.sh')
     ]
 
-    return files
-
 
 def test_set(env):
     """Tests setting values in the environment."""
@@ -322,8 +314,119 @@ def test_preserve_environment(prepare_environment_for_tests):
     assert os.environ['PATH_LIST'] == '/path/second:/path/third'
 
 
+@pytest.mark.parametrize('files,expected,deleted', [
+    # Sets two variables
+    ((os.path.join(datadir, 'sourceme_first.sh'),),
+     {'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []),
+    # Check if we can set a variable to different values depending
+    # on command line parameters
+    ((os.path.join(datadir, 'sourceme_parameters.sh'),),
+     {'FOO': 'default'}, []),
+    (([os.path.join(datadir, 'sourceme_parameters.sh'), 'intel64'],),
+     {'FOO': 'intel64'}, []),
+    # Check unsetting variables
+    ((os.path.join(datadir, 'sourceme_second.sh'),),
+     {'PATH_LIST': '/path/first:/path/second:/path/fourth'},
+     ['EMPTY_PATH_LIST']),
+    # Check that order of sourcing matters
+    ((os.path.join(datadir, 'sourceme_unset.sh'),
+      os.path.join(datadir, 'sourceme_first.sh')),
+     {'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []),
+    ((os.path.join(datadir, 'sourceme_first.sh'),
+      os.path.join(datadir, 'sourceme_unset.sh')),
+     {'NEW_VAR': 'new'}, ['UNSET_ME']),
+
+])
+@pytest.mark.usefixtures('prepare_environment_for_tests')
+def test_environment_from_sourcing_files(files, expected, deleted):
+
+    env = environment.environment_after_sourcing_files(*files)
+
+    # Test that variables that have been modified are still there and contain
+    # the expected output
+    for name, value in expected.items():
+        assert name in env
+        assert value in env[name]
+
+    # Test that variables that have been unset are not there
+    for name in deleted:
+        assert name not in env
+
+
 def test_clear(env):
     env.set('A', 'dummy value')
     assert len(env) > 0
     env.clear()
     assert len(env) == 0
+
+
+@pytest.mark.parametrize('env,blacklist,whitelist', [
+    # Check we can blacklist a literal
+    ({'SHLVL': '1'}, ['SHLVL'], []),
+    # Check whitelist takes precedence
+    ({'SHLVL': '1'}, ['SHLVL'], ['SHLVL']),
+])
+def test_sanitize_literals(env, blacklist, whitelist):
+
+    after = environment.sanitize(env, blacklist, whitelist)
+
+    # Check that all the whitelisted variables are there
+    assert all(x in after for x in whitelist)
+
+    # Check that the blacklisted variables that are not
+    # whitelisted are there
+    blacklist = list(set(blacklist) - set(whitelist))
+    assert all(x not in after for x in blacklist)
+
+
+@pytest.mark.parametrize('env,blacklist,whitelist,expected,deleted', [
+    # Check we can blacklist using a regex
+    ({'SHLVL': '1'}, ['SH.*'], [], [], ['SHLVL']),
+    # Check we can whitelist using a regex
+    ({'SHLVL': '1'}, ['SH.*'], ['SH.*'], ['SHLVL'], []),
+    # Check regex to blacklist Modules v4 related vars
+    ({'MODULES_LMALTNAME': '1', 'MODULES_LMCONFLICT': '2'},
+     ['MODULES_(.*)'], [], [], ['MODULES_LMALTNAME', 'MODULES_LMCONFLICT']),
+    ({'A_modquar': '1', 'b_modquar': '2', 'C_modshare': '3'},
+     [r'(\w*)_mod(quar|share)'], [], [],
+     ['A_modquar', 'b_modquar', 'C_modshare']),
+])
+def test_sanitize_regex(env, blacklist, whitelist, expected, deleted):
+
+    after = environment.sanitize(env, blacklist, whitelist)
+
+    assert all(x in after for x in expected)
+    assert all(x not in after for x in deleted)
+
+
+@pytest.mark.parametrize('before,after,search_list', [
+    # Set environment variables
+    ({}, {'FOO': 'foo'}, [environment.SetEnv('FOO', 'foo')]),
+    # Unset environment variables
+    ({'FOO': 'foo'}, {}, [environment.UnsetEnv('FOO')]),
+    # Append paths to an environment variable
+    ({'FOO_PATH': '/a/path'}, {'FOO_PATH': '/a/path:/b/path'},
+     [environment.AppendPath('FOO_PATH', '/b/path')]),
+    ({}, {'FOO_PATH': '/a/path:/b/path'}, [
+        environment.AppendPath('FOO_PATH', '/a/path:/b/path')
+    ]),
+    ({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/b/path'}, [
+        environment.RemovePath('FOO_PATH', '/a/path')
+    ]),
+    ({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/a/path:/c/path'}, [
+        environment.RemovePath('FOO_PATH', '/b/path'),
+        environment.AppendPath('FOO_PATH', '/c/path')
+    ]),
+    ({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/c/path:/a/path'}, [
+        environment.RemovePath('FOO_PATH', '/b/path'),
+        environment.PrependPath('FOO_PATH', '/c/path')
+    ])
+])
+def test_from_environment_diff(before, after, search_list):
+
+    mod = environment.EnvironmentModifications.from_environment_diff(
+        before, after
+    )
+
+    for item in search_list:
+        assert item in mod
diff --git a/lib/spack/spack/util/environment.py b/lib/spack/spack/util/environment.py
index ef2d2a23b6..7ad80a7eab 100644
--- a/lib/spack/spack/util/environment.py
+++ b/lib/spack/spack/util/environment.py
@@ -12,9 +12,11 @@ import os
 import re
 import sys
 import os.path
-import subprocess
+
+import six
 
 import llnl.util.tty as tty
+import spack.util.executable as executable
 
 from llnl.util.lang import dedupe
 
@@ -171,6 +173,11 @@ class NameModifier(object):
 
         self.args.update(kwargs)
 
+    def __eq__(self, other):
+        if not isinstance(other, NameModifier):
+            return False
+        return self.name == other.name
+
     def update_args(self, **kwargs):
         self.__dict__.update(kwargs)
         self.args.update(kwargs)
@@ -185,6 +192,13 @@ class NameValueModifier(object):
         self.args = {'name': name, 'value': value, 'separator': self.separator}
         self.args.update(kwargs)
 
+    def __eq__(self, other):
+        if not isinstance(other, NameValueModifier):
+            return False
+        return self.name == other.name and \
+            self.value == other.value and \
+            self.separator == other.separator
+
     def update_args(self, **kwargs):
         self.__dict__.update(kwargs)
         self.args.update(kwargs)
@@ -483,122 +497,86 @@ class EnvironmentModifications(object):
         return cmds
 
     @staticmethod
-    def from_sourcing_file(filename, *args, **kwargs):
-        """Returns modifications that would be made by sourcing a file.
-
-        Parameters:
-            filename (str): The file to source
-            *args (list of str): Arguments to pass on the command line
-
-        Keyword Arguments:
-            shell (str): The shell to use (default: ``bash``)
-            shell_options (str): Options passed to the shell (default: ``-c``)
-            source_command (str): The command to run (default: ``source``)
-            suppress_output (str): Redirect used to suppress output of command
+    def from_sourcing_file(filename, *arguments, **kwargs):
+        """Constructs an instance of a
+        :py:class:`spack.util.environment.EnvironmentModifications` object
+        that has the same effect as sourcing a file.
+
+        Args:
+            filename (str): the file to be sourced
+            *arguments (list of str): arguments to pass on the command line
+
+        Keyword Args:
+            shell (str): the shell to use (default: ``bash``)
+            shell_options (str): options passed to the shell (default: ``-c``)
+            source_command (str): the command to run (default: ``source``)
+            suppress_output (str): redirect used to suppress output of command
                 (default: ``&> /dev/null``)
-            concatenate_on_success (str): Operator used to execute a command
+            concatenate_on_success (str): operator used to execute a command
                 only when the previous command succeeds (default: ``&&``)
-            blacklist ([str or re]): Ignore any modifications of these
+            blacklist ([str or re]): ignore any modifications of these
                 variables (default: [])
-            whitelist ([str or re]): Always respect modifications of these
-                variables (default: []). Has precedence over blacklist.
-            clean (bool): In addition to removing empty entries,
+            whitelist ([str or re]): always respect modifications of these
+                variables (default: []). has precedence over blacklist.
+            clean (bool): in addition to removing empty entries,
                 also remove duplicate entries (default: False).
-
-        Returns:
-            EnvironmentModifications: an object that, if executed, has
-                the same effect on the environment as sourcing the file
         """
         # Check if the file actually exists
         if not os.path.isfile(filename):
             msg = 'Trying to source non-existing file: {0}'.format(filename)
             raise RuntimeError(msg)
 
-        # Kwargs parsing and default values
-        shell                  = kwargs.get('shell', '/bin/bash')
-        shell_options          = kwargs.get('shell_options', '-c')
-        source_command         = kwargs.get('source_command', 'source')
-        suppress_output        = kwargs.get('suppress_output', '&> /dev/null')
-        concatenate_on_success = kwargs.get('concatenate_on_success', '&&')
-        blacklist              = kwargs.get('blacklist', [])
-        whitelist              = kwargs.get('whitelist', [])
-        clean                  = kwargs.get('clean', False)
-
-        source_file = [source_command, filename]
-        source_file.extend(args)
-        source_file = ' '.join(source_file)
-
-        dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))'
-        dump_environment = 'python -c "{0}"'.format(dump_cmd)
-
-        # Construct the command that will be executed
-        command = [
-            shell,
-            shell_options,
-            ' '.join([
-                source_file, suppress_output,
-                concatenate_on_success, dump_environment,
-            ]),
-        ]
-
-        # Try to source the file
-        proc = subprocess.Popen(
-            command, stdout=subprocess.PIPE, env=os.environ)
-        proc.wait()
-
-        if proc.returncode != 0:
-            msg = 'Sourcing file {0} returned a non-zero exit code'.format(
-                filename)
-            raise RuntimeError(msg)
-
-        output = ''.join([line.decode('utf-8') for line in proc.stdout])
-
-        # Construct dictionaries of the environment before and after
-        # sourcing the file, so that we can diff them.
-        env_before = dict(os.environ)
-        env_after = json.loads(output)
-
-        # If we're in python2, convert to str objects instead of unicode
-        # like json gives us.  We can't put unicode in os.environ anyway.
-        if sys.version_info[0] < 3:
-            env_after = dict((k.encode('utf-8'), v.encode('utf-8'))
-                             for k, v in env_after.items())
+        # Prepare a whitelist and a blacklist of environment variable names
+        blacklist = kwargs.get('blacklist', [])
+        whitelist = kwargs.get('whitelist', [])
+        clean = kwargs.get('clean', False)
 
         # Other variables unrelated to sourcing a file
-        blacklist.extend(['SHLVL', '_', 'PWD', 'OLDPWD', 'PS2'])
-
-        def set_intersection(fullset, *args):
-            # A set intersection using string literals and regexs
-            meta = '[' + re.escape('[$()*?[]^{|}') + ']'
-            subset = fullset & set(args)   # As literal
-            for name in args:
-                if re.search(meta, name):
-                    pattern = re.compile(name)
-                    for k in fullset:
-                        if re.match(pattern, k):
-                            subset.add(k)
-            return subset
-
-        for d in env_after, env_before:
-            # Retain (whitelist) has priority over prune (blacklist)
-            prune = set_intersection(set(d), *blacklist)
-            prune -= set_intersection(prune, *whitelist)
-            for k in prune:
-                d.pop(k, None)
+        blacklist.extend([
+            # Bash internals
+            'SHLVL', '_', 'PWD', 'OLDPWD', 'PS1', 'PS2', 'ENV',
+            # Environment modules v4
+            'LOADEDMODULES', '_LMFILES_', 'BASH_FUNC_module()', 'MODULEPATH',
+            'MODULES_(.*)', r'(\w*)_mod(quar|share)'
+        ])
+
+        # Compute the environments before and after sourcing
+        before = sanitize(
+            dict(os.environ), blacklist=blacklist, whitelist=whitelist
+        )
+        file_and_args = (filename,) + arguments
+        after = sanitize(
+            environment_after_sourcing_files(file_and_args, **kwargs),
+            blacklist=blacklist, whitelist=whitelist
+        )
+
+        # Delegate to the other factory
+        return EnvironmentModifications.from_environment_diff(
+            before, after, clean
+        )
 
+    @staticmethod
+    def from_environment_diff(before, after, clean=False):
+        """Constructs an instance of a
+        :py:class:`spack.util.environment.EnvironmentModifications` object
+        from the diff of two dictionaries.
+
+        Args:
+            before (dict): environment before the modifications are applied
+            after (dict): environment after the modifications are applied
+            clean (bool): in addition to removing empty entries, also remove
+                duplicate entries
+        """
         # Fill the EnvironmentModifications instance
         env = EnvironmentModifications()
-
         # New variables
-        new_variables = list(set(env_after) - set(env_before))
+        new_variables = list(set(after) - set(before))
         # Variables that have been unset
-        unset_variables = list(set(env_before) - set(env_after))
+        unset_variables = list(set(before) - set(after))
         # Variables that have been modified
-        common_variables = set(env_before).intersection(set(env_after))
-
+        common_variables = set(before).intersection(set(after))
         modified_variables = [x for x in common_variables
-                              if env_before[x] != env_after[x]]
-
+                              if before[x] != after[x]]
         # Consistent output order - looks nicer, easier comparison...
         new_variables.sort()
         unset_variables.sort()
@@ -616,21 +594,21 @@ class EnvironmentModifications(object):
         # Assume that variables with 'PATH' in the name or that contain
         # separators like ':' or ';' are more likely to be paths
         for x in new_variables:
-            sep = return_separator_if_any(env_after[x])
+            sep = return_separator_if_any(after[x])
             if sep:
-                env.prepend_path(x, env_after[x], separator=sep)
+                env.prepend_path(x, after[x], separator=sep)
             elif 'PATH' in x:
-                env.prepend_path(x, env_after[x])
+                env.prepend_path(x, after[x])
             else:
                 # We just need to set the variable to the new value
-                env.set(x, env_after[x])
+                env.set(x, after[x])
 
         for x in unset_variables:
             env.unset(x)
 
         for x in modified_variables:
-            before = env_before[x]
-            after = env_after[x]
+            before = before[x]
+            after = after[x]
             sep = return_separator_if_any(before, after)
             if sep:
                 before_list = before.split(sep)
@@ -844,3 +822,108 @@ def preserve_environment(*variables):
             msg += ' {0} was set to "{1}", will be unset'
             tty.debug(msg.format(var, os.environ[var]))
             del os.environ[var]
+
+
+def environment_after_sourcing_files(*files, **kwargs):
+    """Returns a dictionary with the environment that one would have
+    after sourcing the files passed as argument.
+
+    Args:
+        *files: each item can either be a string containing the path
+            of the file to be sourced or a sequence, where the first element
+            is the file to be sourced and the remaining are arguments to be
+            passed to the command line
+
+    Keyword Args:
+        env (dict): the initial environment (default: current environment)
+        shell (str): the shell to use (default: ``/bin/bash``)
+        shell_options (str): options passed to the shell (default: ``-c``)
+        source_command (str): the command to run (default: ``source``)
+        suppress_output (str): redirect used to suppress output of command
+            (default: ``&> /dev/null``)
+        concatenate_on_success (str): operator used to execute a command
+            only when the previous command succeeds (default: ``&&``)
+    """
+    # Set the shell executable that will be used to source files
+    shell_cmd = kwargs.get('shell', '/bin/bash')
+    shell_options = kwargs.get('shell_options', '-c')
+    source_command = kwargs.get('source_command', 'source')
+    suppress_output = kwargs.get('suppress_output', '&> /dev/null')
+    concatenate_on_success = kwargs.get('concatenate_on_success', '&&')
+
+    shell = executable.Executable(' '.join([shell_cmd, shell_options]))
+
+    def _source_single_file(file_and_args, environment):
+        source_file = [source_command]
+        source_file.extend(x for x in file_and_args)
+        source_file = ' '.join(source_file)
+
+        dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))'
+        dump_environment = 'python -c "{0}"'.format(dump_cmd)
+
+        # Try to source the file
+        source_file_arguments = ' '.join([
+            source_file, suppress_output,
+            concatenate_on_success, dump_environment,
+        ])
+        output = shell(source_file_arguments, output=str, env=environment)
+        environment = json.loads(output)
+
+        # If we're in python2, convert to str objects instead of unicode
+        # like json gives us.  We can't put unicode in os.environ anyway.
+        if sys.version_info[0] < 3:
+            environment = dict(
+                (k.encode('utf-8'), v.encode('utf-8'))
+                for k, v in environment.items()
+            )
+
+        return environment
+
+    current_environment = kwargs.get('env', dict(os.environ))
+    for f in files:
+        # Normalize the input to the helper function
+        if isinstance(f, six.string_types):
+            f = [f]
+
+        current_environment = _source_single_file(
+            f, environment=current_environment
+        )
+
+    return current_environment
+
+
+def sanitize(environment, blacklist, whitelist):
+    """Returns a copy of the input dictionary where all the keys that
+    match a blacklist pattern and don't match a whitelist pattern are
+    removed.
+
+    Args:
+        environment (dict): input dictionary
+        blacklist (list of str): literals or regex patterns to be
+            blacklisted
+        whitelist (list of str): literals or regex patterns to be
+            whitelisted
+    """
+
+    def set_intersection(fullset, *args):
+        # A set intersection using string literals and regexs
+        meta = '[' + re.escape('[$()*?[]^{|}') + ']'
+        subset = fullset & set(args)  # As literal
+        for name in args:
+            if re.search(meta, name):
+                pattern = re.compile(name)
+                for k in fullset:
+                    if re.match(pattern, k):
+                        subset.add(k)
+        return subset
+
+    # Don't modify input, make a copy instead
+    environment = dict(environment)
+
+    # Retain (whitelist) has priority over prune (blacklist)
+    prune = set_intersection(set(environment), *blacklist)
+    prune -= set_intersection(prune, *whitelist)
+    for k in prune:
+        environment.pop(k, None)
+
+    return environment
-- 
cgit v1.2.3-70-g09d2