From 2fe1ecbaa2f87bc3f11f73ee41828ea5b9a6b3d5 Mon Sep 17 00:00:00 2001 From: Massimiliano Culpo Date: Tue, 16 Jul 2019 17:54:33 +0200 Subject: Ignore Modules v4 environment variables in `from_sourcing_file` (#10753) * from_sourcing_file: fixed a bug + added a few ignored variables closes #7536 Credits for this change goes to mgsternberg (original author of #7536) The new variables being ignored are specific to Modules v4. * Use Spack Executable in 'EnvironmentModifications.from_sourcing_file' Using this class avoids duplicating lower level logic to decode stdout and handle non-zero return codes * Extracted a function that returns the environment after sourcing files The logic in `EnvironmentModifications.from_sourcing_file` has been simplified by extracting a function that returns a dictionary with the environment one would have after sourcing the files passed as argument. * Further refactoring of EnvironmentModifications.from_sourcing_file Extracted a function that sanitizes a dictionary removing keys that are blacklisted, but keeping those that are whitelisted. Blacklisting and whitelisting can be done on literals or regex. Extracted a new factory that creates an instance of EnvironmentModifications from a diff of two environments. * Added unit tests * PS1 is blacklisted + more readable names for some variables --- lib/spack/spack/test/data/sourceme_unset.sh | 8 + lib/spack/spack/test/environment_modifications.py | 129 +++++++++- lib/spack/spack/util/environment.py | 291 ++++++++++++++-------- 3 files changed, 311 insertions(+), 117 deletions(-) create mode 100644 lib/spack/spack/test/data/sourceme_unset.sh diff --git a/lib/spack/spack/test/data/sourceme_unset.sh b/lib/spack/spack/test/data/sourceme_unset.sh new file mode 100644 index 0000000000..e14aeb3f89 --- /dev/null +++ b/lib/spack/spack/test/data/sourceme_unset.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# +# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +unset UNSET_ME diff --git a/lib/spack/spack/test/environment_modifications.py b/lib/spack/spack/test/environment_modifications.py index 664a57a92a..b8bc7d3c34 100644 --- a/lib/spack/spack/test/environment_modifications.py +++ b/lib/spack/spack/test/environment_modifications.py @@ -14,6 +14,9 @@ from spack.util.environment import SetEnv, UnsetEnv from spack.util.environment import filter_system_paths, is_system_path +datadir = os.path.join(spack_root, 'lib', 'spack', 'spack', 'test', 'data') + + def test_inspect_path(tmpdir): inspections = { 'bin': ['PATH'], @@ -55,7 +58,7 @@ def test_exclude_paths_from_inspection(): @pytest.fixture() -def prepare_environment_for_tests(): +def prepare_environment_for_tests(working_env): """Sets a few dummy variables in the current environment, that will be useful for the tests below. """ @@ -67,11 +70,6 @@ def prepare_environment_for_tests(): os.environ['PATH_LIST_WITH_SYSTEM_PATHS'] \ = '/usr/include:' + os.environ['REMOVE_PATH_LIST'] os.environ['PATH_LIST_WITH_DUPLICATES'] = os.environ['REMOVE_PATH_LIST'] - yield - for x in ('UNSET_ME', 'EMPTY_PATH_LIST', 'PATH_LIST', 'REMOVE_PATH_LIST', - 'PATH_LIST_WITH_SYSTEM_PATHS', 'PATH_LIST_WITH_DUPLICATES'): - if x in os.environ: - del os.environ[x] @pytest.fixture @@ -109,19 +107,13 @@ def miscellaneous_paths(): @pytest.fixture def files_to_be_sourced(): """Returns a list of files to be sourced""" - datadir = os.path.join( - spack_root, 'lib', 'spack', 'spack', 'test', 'data' - ) - - files = [ + return [ os.path.join(datadir, 'sourceme_first.sh'), os.path.join(datadir, 'sourceme_second.sh'), os.path.join(datadir, 'sourceme_parameters.sh'), os.path.join(datadir, 'sourceme_unicode.sh') ] - return files - def test_set(env): """Tests setting values in the environment.""" @@ -322,8 +314,119 @@ def test_preserve_environment(prepare_environment_for_tests): assert os.environ['PATH_LIST'] == '/path/second:/path/third' +@pytest.mark.parametrize('files,expected,deleted', [ + # Sets two variables + ((os.path.join(datadir, 'sourceme_first.sh'),), + {'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []), + # Check if we can set a variable to different values depending + # on command line parameters + ((os.path.join(datadir, 'sourceme_parameters.sh'),), + {'FOO': 'default'}, []), + (([os.path.join(datadir, 'sourceme_parameters.sh'), 'intel64'],), + {'FOO': 'intel64'}, []), + # Check unsetting variables + ((os.path.join(datadir, 'sourceme_second.sh'),), + {'PATH_LIST': '/path/first:/path/second:/path/fourth'}, + ['EMPTY_PATH_LIST']), + # Check that order of sourcing matters + ((os.path.join(datadir, 'sourceme_unset.sh'), + os.path.join(datadir, 'sourceme_first.sh')), + {'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []), + ((os.path.join(datadir, 'sourceme_first.sh'), + os.path.join(datadir, 'sourceme_unset.sh')), + {'NEW_VAR': 'new'}, ['UNSET_ME']), + +]) +@pytest.mark.usefixtures('prepare_environment_for_tests') +def test_environment_from_sourcing_files(files, expected, deleted): + + env = environment.environment_after_sourcing_files(*files) + + # Test that variables that have been modified are still there and contain + # the expected output + for name, value in expected.items(): + assert name in env + assert value in env[name] + + # Test that variables that have been unset are not there + for name in deleted: + assert name not in env + + def test_clear(env): env.set('A', 'dummy value') assert len(env) > 0 env.clear() assert len(env) == 0 + + +@pytest.mark.parametrize('env,blacklist,whitelist', [ + # Check we can blacklist a literal + ({'SHLVL': '1'}, ['SHLVL'], []), + # Check whitelist takes precedence + ({'SHLVL': '1'}, ['SHLVL'], ['SHLVL']), +]) +def test_sanitize_literals(env, blacklist, whitelist): + + after = environment.sanitize(env, blacklist, whitelist) + + # Check that all the whitelisted variables are there + assert all(x in after for x in whitelist) + + # Check that the blacklisted variables that are not + # whitelisted are there + blacklist = list(set(blacklist) - set(whitelist)) + assert all(x not in after for x in blacklist) + + +@pytest.mark.parametrize('env,blacklist,whitelist,expected,deleted', [ + # Check we can blacklist using a regex + ({'SHLVL': '1'}, ['SH.*'], [], [], ['SHLVL']), + # Check we can whitelist using a regex + ({'SHLVL': '1'}, ['SH.*'], ['SH.*'], ['SHLVL'], []), + # Check regex to blacklist Modules v4 related vars + ({'MODULES_LMALTNAME': '1', 'MODULES_LMCONFLICT': '2'}, + ['MODULES_(.*)'], [], [], ['MODULES_LMALTNAME', 'MODULES_LMCONFLICT']), + ({'A_modquar': '1', 'b_modquar': '2', 'C_modshare': '3'}, + [r'(\w*)_mod(quar|share)'], [], [], + ['A_modquar', 'b_modquar', 'C_modshare']), +]) +def test_sanitize_regex(env, blacklist, whitelist, expected, deleted): + + after = environment.sanitize(env, blacklist, whitelist) + + assert all(x in after for x in expected) + assert all(x not in after for x in deleted) + + +@pytest.mark.parametrize('before,after,search_list', [ + # Set environment variables + ({}, {'FOO': 'foo'}, [environment.SetEnv('FOO', 'foo')]), + # Unset environment variables + ({'FOO': 'foo'}, {}, [environment.UnsetEnv('FOO')]), + # Append paths to an environment variable + ({'FOO_PATH': '/a/path'}, {'FOO_PATH': '/a/path:/b/path'}, + [environment.AppendPath('FOO_PATH', '/b/path')]), + ({}, {'FOO_PATH': '/a/path:/b/path'}, [ + environment.AppendPath('FOO_PATH', '/a/path:/b/path') + ]), + ({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/b/path'}, [ + environment.RemovePath('FOO_PATH', '/a/path') + ]), + ({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/a/path:/c/path'}, [ + environment.RemovePath('FOO_PATH', '/b/path'), + environment.AppendPath('FOO_PATH', '/c/path') + ]), + ({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/c/path:/a/path'}, [ + environment.RemovePath('FOO_PATH', '/b/path'), + environment.PrependPath('FOO_PATH', '/c/path') + ]) +]) +def test_from_environment_diff(before, after, search_list): + + mod = environment.EnvironmentModifications.from_environment_diff( + before, after + ) + + for item in search_list: + assert item in mod diff --git a/lib/spack/spack/util/environment.py b/lib/spack/spack/util/environment.py index ef2d2a23b6..7ad80a7eab 100644 --- a/lib/spack/spack/util/environment.py +++ b/lib/spack/spack/util/environment.py @@ -12,9 +12,11 @@ import os import re import sys import os.path -import subprocess + +import six import llnl.util.tty as tty +import spack.util.executable as executable from llnl.util.lang import dedupe @@ -171,6 +173,11 @@ class NameModifier(object): self.args.update(kwargs) + def __eq__(self, other): + if not isinstance(other, NameModifier): + return False + return self.name == other.name + def update_args(self, **kwargs): self.__dict__.update(kwargs) self.args.update(kwargs) @@ -185,6 +192,13 @@ class NameValueModifier(object): self.args = {'name': name, 'value': value, 'separator': self.separator} self.args.update(kwargs) + def __eq__(self, other): + if not isinstance(other, NameValueModifier): + return False + return self.name == other.name and \ + self.value == other.value and \ + self.separator == other.separator + def update_args(self, **kwargs): self.__dict__.update(kwargs) self.args.update(kwargs) @@ -483,122 +497,86 @@ class EnvironmentModifications(object): return cmds @staticmethod - def from_sourcing_file(filename, *args, **kwargs): - """Returns modifications that would be made by sourcing a file. - - Parameters: - filename (str): The file to source - *args (list of str): Arguments to pass on the command line - - Keyword Arguments: - shell (str): The shell to use (default: ``bash``) - shell_options (str): Options passed to the shell (default: ``-c``) - source_command (str): The command to run (default: ``source``) - suppress_output (str): Redirect used to suppress output of command + def from_sourcing_file(filename, *arguments, **kwargs): + """Constructs an instance of a + :py:class:`spack.util.environment.EnvironmentModifications` object + that has the same effect as sourcing a file. + + Args: + filename (str): the file to be sourced + *arguments (list of str): arguments to pass on the command line + + Keyword Args: + shell (str): the shell to use (default: ``bash``) + shell_options (str): options passed to the shell (default: ``-c``) + source_command (str): the command to run (default: ``source``) + suppress_output (str): redirect used to suppress output of command (default: ``&> /dev/null``) - concatenate_on_success (str): Operator used to execute a command + concatenate_on_success (str): operator used to execute a command only when the previous command succeeds (default: ``&&``) - blacklist ([str or re]): Ignore any modifications of these + blacklist ([str or re]): ignore any modifications of these variables (default: []) - whitelist ([str or re]): Always respect modifications of these - variables (default: []). Has precedence over blacklist. - clean (bool): In addition to removing empty entries, + whitelist ([str or re]): always respect modifications of these + variables (default: []). has precedence over blacklist. + clean (bool): in addition to removing empty entries, also remove duplicate entries (default: False). - - Returns: - EnvironmentModifications: an object that, if executed, has - the same effect on the environment as sourcing the file """ # Check if the file actually exists if not os.path.isfile(filename): msg = 'Trying to source non-existing file: {0}'.format(filename) raise RuntimeError(msg) - # Kwargs parsing and default values - shell = kwargs.get('shell', '/bin/bash') - shell_options = kwargs.get('shell_options', '-c') - source_command = kwargs.get('source_command', 'source') - suppress_output = kwargs.get('suppress_output', '&> /dev/null') - concatenate_on_success = kwargs.get('concatenate_on_success', '&&') - blacklist = kwargs.get('blacklist', []) - whitelist = kwargs.get('whitelist', []) - clean = kwargs.get('clean', False) - - source_file = [source_command, filename] - source_file.extend(args) - source_file = ' '.join(source_file) - - dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))' - dump_environment = 'python -c "{0}"'.format(dump_cmd) - - # Construct the command that will be executed - command = [ - shell, - shell_options, - ' '.join([ - source_file, suppress_output, - concatenate_on_success, dump_environment, - ]), - ] - - # Try to source the file - proc = subprocess.Popen( - command, stdout=subprocess.PIPE, env=os.environ) - proc.wait() - - if proc.returncode != 0: - msg = 'Sourcing file {0} returned a non-zero exit code'.format( - filename) - raise RuntimeError(msg) - - output = ''.join([line.decode('utf-8') for line in proc.stdout]) - - # Construct dictionaries of the environment before and after - # sourcing the file, so that we can diff them. - env_before = dict(os.environ) - env_after = json.loads(output) - - # If we're in python2, convert to str objects instead of unicode - # like json gives us. We can't put unicode in os.environ anyway. - if sys.version_info[0] < 3: - env_after = dict((k.encode('utf-8'), v.encode('utf-8')) - for k, v in env_after.items()) + # Prepare a whitelist and a blacklist of environment variable names + blacklist = kwargs.get('blacklist', []) + whitelist = kwargs.get('whitelist', []) + clean = kwargs.get('clean', False) # Other variables unrelated to sourcing a file - blacklist.extend(['SHLVL', '_', 'PWD', 'OLDPWD', 'PS2']) - - def set_intersection(fullset, *args): - # A set intersection using string literals and regexs - meta = '[' + re.escape('[$()*?[]^{|}') + ']' - subset = fullset & set(args) # As literal - for name in args: - if re.search(meta, name): - pattern = re.compile(name) - for k in fullset: - if re.match(pattern, k): - subset.add(k) - return subset - - for d in env_after, env_before: - # Retain (whitelist) has priority over prune (blacklist) - prune = set_intersection(set(d), *blacklist) - prune -= set_intersection(prune, *whitelist) - for k in prune: - d.pop(k, None) + blacklist.extend([ + # Bash internals + 'SHLVL', '_', 'PWD', 'OLDPWD', 'PS1', 'PS2', 'ENV', + # Environment modules v4 + 'LOADEDMODULES', '_LMFILES_', 'BASH_FUNC_module()', 'MODULEPATH', + 'MODULES_(.*)', r'(\w*)_mod(quar|share)' + ]) + + # Compute the environments before and after sourcing + before = sanitize( + dict(os.environ), blacklist=blacklist, whitelist=whitelist + ) + file_and_args = (filename,) + arguments + after = sanitize( + environment_after_sourcing_files(file_and_args, **kwargs), + blacklist=blacklist, whitelist=whitelist + ) + + # Delegate to the other factory + return EnvironmentModifications.from_environment_diff( + before, after, clean + ) + @staticmethod + def from_environment_diff(before, after, clean=False): + """Constructs an instance of a + :py:class:`spack.util.environment.EnvironmentModifications` object + from the diff of two dictionaries. + + Args: + before (dict): environment before the modifications are applied + after (dict): environment after the modifications are applied + clean (bool): in addition to removing empty entries, also remove + duplicate entries + """ # Fill the EnvironmentModifications instance env = EnvironmentModifications() - # New variables - new_variables = list(set(env_after) - set(env_before)) + new_variables = list(set(after) - set(before)) # Variables that have been unset - unset_variables = list(set(env_before) - set(env_after)) + unset_variables = list(set(before) - set(after)) # Variables that have been modified - common_variables = set(env_before).intersection(set(env_after)) - + common_variables = set(before).intersection(set(after)) modified_variables = [x for x in common_variables - if env_before[x] != env_after[x]] - + if before[x] != after[x]] # Consistent output order - looks nicer, easier comparison... new_variables.sort() unset_variables.sort() @@ -616,21 +594,21 @@ class EnvironmentModifications(object): # Assume that variables with 'PATH' in the name or that contain # separators like ':' or ';' are more likely to be paths for x in new_variables: - sep = return_separator_if_any(env_after[x]) + sep = return_separator_if_any(after[x]) if sep: - env.prepend_path(x, env_after[x], separator=sep) + env.prepend_path(x, after[x], separator=sep) elif 'PATH' in x: - env.prepend_path(x, env_after[x]) + env.prepend_path(x, after[x]) else: # We just need to set the variable to the new value - env.set(x, env_after[x]) + env.set(x, after[x]) for x in unset_variables: env.unset(x) for x in modified_variables: - before = env_before[x] - after = env_after[x] + before = before[x] + after = after[x] sep = return_separator_if_any(before, after) if sep: before_list = before.split(sep) @@ -844,3 +822,108 @@ def preserve_environment(*variables): msg += ' {0} was set to "{1}", will be unset' tty.debug(msg.format(var, os.environ[var])) del os.environ[var] + + +def environment_after_sourcing_files(*files, **kwargs): + """Returns a dictionary with the environment that one would have + after sourcing the files passed as argument. + + Args: + *files: each item can either be a string containing the path + of the file to be sourced or a sequence, where the first element + is the file to be sourced and the remaining are arguments to be + passed to the command line + + Keyword Args: + env (dict): the initial environment (default: current environment) + shell (str): the shell to use (default: ``/bin/bash``) + shell_options (str): options passed to the shell (default: ``-c``) + source_command (str): the command to run (default: ``source``) + suppress_output (str): redirect used to suppress output of command + (default: ``&> /dev/null``) + concatenate_on_success (str): operator used to execute a command + only when the previous command succeeds (default: ``&&``) + """ + # Set the shell executable that will be used to source files + shell_cmd = kwargs.get('shell', '/bin/bash') + shell_options = kwargs.get('shell_options', '-c') + source_command = kwargs.get('source_command', 'source') + suppress_output = kwargs.get('suppress_output', '&> /dev/null') + concatenate_on_success = kwargs.get('concatenate_on_success', '&&') + + shell = executable.Executable(' '.join([shell_cmd, shell_options])) + + def _source_single_file(file_and_args, environment): + source_file = [source_command] + source_file.extend(x for x in file_and_args) + source_file = ' '.join(source_file) + + dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))' + dump_environment = 'python -c "{0}"'.format(dump_cmd) + + # Try to source the file + source_file_arguments = ' '.join([ + source_file, suppress_output, + concatenate_on_success, dump_environment, + ]) + output = shell(source_file_arguments, output=str, env=environment) + environment = json.loads(output) + + # If we're in python2, convert to str objects instead of unicode + # like json gives us. We can't put unicode in os.environ anyway. + if sys.version_info[0] < 3: + environment = dict( + (k.encode('utf-8'), v.encode('utf-8')) + for k, v in environment.items() + ) + + return environment + + current_environment = kwargs.get('env', dict(os.environ)) + for f in files: + # Normalize the input to the helper function + if isinstance(f, six.string_types): + f = [f] + + current_environment = _source_single_file( + f, environment=current_environment + ) + + return current_environment + + +def sanitize(environment, blacklist, whitelist): + """Returns a copy of the input dictionary where all the keys that + match a blacklist pattern and don't match a whitelist pattern are + removed. + + Args: + environment (dict): input dictionary + blacklist (list of str): literals or regex patterns to be + blacklisted + whitelist (list of str): literals or regex patterns to be + whitelisted + """ + + def set_intersection(fullset, *args): + # A set intersection using string literals and regexs + meta = '[' + re.escape('[$()*?[]^{|}') + ']' + subset = fullset & set(args) # As literal + for name in args: + if re.search(meta, name): + pattern = re.compile(name) + for k in fullset: + if re.match(pattern, k): + subset.add(k) + return subset + + # Don't modify input, make a copy instead + environment = dict(environment) + + # Retain (whitelist) has priority over prune (blacklist) + prune = set_intersection(set(environment), *blacklist) + prune -= set_intersection(prune, *whitelist) + for k in prune: + environment.pop(k, None) + + return environment -- cgit v1.2.3-70-g09d2