From 35dd929651c1a610a767e6f6249da514a44eb653 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sun, 23 Dec 2018 23:59:58 -0800 Subject: bugfix: handle unicode properly in spack.util.executable (#10186) - When returning string output, use text_type and decode utf-8 in Python 2 instead of using `str` - This properly handles unicode, whereas before we would pass bad strings to colify in `spack blame` when reading git output - add a test that round-trips some unicode through an Executable object --- lib/spack/spack/test/util/executable.py | 34 +++++++++++++++++++++++++++++++++ lib/spack/spack/util/executable.py | 21 +++----------------- 2 files changed, 37 insertions(+), 18 deletions(-) create mode 100644 lib/spack/spack/test/util/executable.py diff --git a/lib/spack/spack/test/util/executable.py b/lib/spack/spack/test/util/executable.py new file mode 100644 index 0000000000..6b4fd2288a --- /dev/null +++ b/lib/spack/spack/test/util/executable.py @@ -0,0 +1,34 @@ +# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import sys + +import llnl.util.filesystem as fs + +import spack.util.executable as ex + + +def test_read_unicode(tmpdir): + script_name = 'print_unicode.py' + + with tmpdir.as_cwd(): + + # make a script that prints some unicode + with open(script_name, 'w') as f: + f.write('''#!{0} +from __future__ import print_function +import sys +if sys.version_info < (3, 0, 0): + reload(sys) + sys.setdefaultencoding('utf8') +print(u'\\xc3') +'''.format(sys.executable)) + + # make it executable + fs.set_executable(script_name) + + # read the unicode back in and see whether things work + script = ex.Executable('./%s' % script_name) + assert u'\xc3' == script(output=str).strip() diff --git a/lib/spack/spack/util/executable.py b/lib/spack/spack/util/executable.py index e1d7b49016..3b544f9206 100644 --- a/lib/spack/spack/util/executable.py +++ b/lib/spack/spack/util/executable.py @@ -6,8 +6,7 @@ import os import re import subprocess -from six import string_types -import sys +from six import string_types, text_type import llnl.util.tty as tty @@ -171,9 +170,9 @@ class Executable(object): if output is str or error is str: result = '' if output is str: - result += to_str(out) + result += text_type(out.decode('utf-8')) if error is str: - result += to_str(err) + result += text_type(err.decode('utf-8')) rc = self.returncode = proc.returncode if fail_on_error and rc != 0 and (rc not in ignore_errors): @@ -224,20 +223,6 @@ class Executable(object): return ' '.join(self.exe) -def to_str(content): - """Produce a str type from the content of a process stream obtained with - Popen.communicate. - """ - # Prior to python3, Popen.communicate returns a str type. For python3 it - # returns a bytes type. In the case of python3 we decode the - # byte string to produce a str type. This will generate junk if the - # encoding is not UTF-8 (which includes ASCII). - if sys.version_info < (3, 0, 0): - return content - else: - return content.decode('utf-8') - - def which(*args, **kwargs): """Finds an executable in the path like command-line which. -- cgit v1.2.3-70-g09d2