summaryrefslogtreecommitdiff
path: root/lib/spack/spack/hooks/sbang.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/spack/spack/hooks/sbang.py')
-rw-r--r--lib/spack/spack/hooks/sbang.py142
1 files changed, 85 insertions, 57 deletions
diff --git a/lib/spack/spack/hooks/sbang.py b/lib/spack/spack/hooks/sbang.py
index 8b99ee7873..1c18c46dc6 100644
--- a/lib/spack/spack/hooks/sbang.py
+++ b/lib/spack/spack/hooks/sbang.py
@@ -6,8 +6,10 @@
import filecmp
import os
import re
+import shutil
import stat
import sys
+import tempfile
import llnl.util.filesystem as fs
import llnl.util.tty as tty
@@ -19,9 +21,14 @@ import spack.store
#: Different Linux distributions have different limits, but 127 is the
#: smallest among all modern versions.
if sys.platform == 'darwin':
- shebang_limit = 511
+ system_shebang_limit = 511
else:
- shebang_limit = 127
+ system_shebang_limit = 127
+
+#: Spack itself also limits the shebang line to at most 4KB, which should be plenty.
+spack_shebang_limit = 4096
+
+interpreter_regex = re.compile(b'#![ \t]*?([^ \t\0\n]+)')
def sbang_install_path():
@@ -29,10 +36,10 @@ def sbang_install_path():
sbang_root = str(spack.store.unpadded_root)
install_path = os.path.join(sbang_root, "bin", "sbang")
path_length = len(install_path)
- if path_length > shebang_limit:
+ if path_length > system_shebang_limit:
msg = ('Install tree root is too long. Spack cannot patch shebang lines'
' when script path length ({0}) exceeds limit ({1}).\n {2}')
- msg = msg.format(path_length, shebang_limit, install_path)
+ msg = msg.format(path_length, system_shebang_limit, install_path)
raise SbangPathError(msg)
return install_path
@@ -49,71 +56,92 @@ def sbang_shebang_line():
return '#!/bin/sh %s' % sbang_install_path()
-def shebang_too_long(path):
- """Detects whether a file has a shebang line that is too long."""
- if not os.path.isfile(path):
- return False
-
- with open(path, 'rb') as script:
- bytes = script.read(2)
- if bytes != b'#!':
- return False
-
- line = bytes + script.readline()
- return len(line) > shebang_limit
+def get_interpreter(binary_string):
+ # The interpreter may be preceded with ' ' and \t, is itself any byte that
+ # follows until the first occurrence of ' ', \t, \0, \n or end of file.
+ match = interpreter_regex.match(binary_string)
+ return None if match is None else match.group(1)
def filter_shebang(path):
- """Adds a second shebang line, using sbang, at the beginning of a file."""
- with open(path, 'rb') as original_file:
- original = original_file.read()
- if sys.version_info >= (2, 7):
- original = original.decode(encoding='UTF-8')
- else:
- original = original.decode('UTF-8')
+ """
+ Adds a second shebang line, using sbang, at the beginning of a file, if necessary.
+ Note: Spack imposes a relaxed shebang line limit, meaning that a newline or end of
+ file must occur before ``spack_shebang_limit`` bytes. If not, the file is not
+ patched.
+ """
+ with open(path, 'rb') as original:
+ # If there is no shebang, we shouldn't replace anything.
+ old_shebang_line = original.read(2)
+ if old_shebang_line != b'#!':
+ return False
- # This line will be prepended to file
- new_sbang_line = '%s\n' % sbang_shebang_line()
+ # Stop reading after b'\n'. Note that old_shebang_line includes the first b'\n'.
+ old_shebang_line += original.readline(spack_shebang_limit - 2)
- # Skip files that are already using sbang.
- if original.startswith(new_sbang_line):
- return
+ # If the shebang line is short, we don't have to do anything.
+ if len(old_shebang_line) <= system_shebang_limit:
+ return False
- # In the following, newlines have to be excluded in the regular expression
- # else any mention of "lua" in the document will lead to spurious matches.
+ # Whenever we can't find a newline within the maximum number of bytes, we will
+ # not attempt to rewrite it. In principle we could still get the interpreter if
+ # only the arguments are truncated, but note that for PHP we need the full line
+ # since we have to append `?>` to it. Since our shebang limit is already very
+ # generous, it's unlikely to happen, and it should be fine to ignore.
+ if (
+ len(old_shebang_line) == spack_shebang_limit and
+ old_shebang_line[-1] != b'\n'
+ ):
+ return False
- # Use --! instead of #! on second line for lua.
- if re.search(r'^#!(/[^/\n]*)*lua\b', original):
- original = re.sub(r'^#', '--', original)
+ # This line will be prepended to file
+ new_sbang_line = (sbang_shebang_line() + '\n').encode('utf-8')
- # Use <?php #! instead of #! on second line for php.
- if re.search(r'^#!(/[^/\n]*)*php\b', original):
- original = re.sub(r'^#', '<?php #', original) + ' ?>'
+ # Skip files that are already using sbang.
+ if old_shebang_line == new_sbang_line:
+ return
- # Use //! instead of #! on second line for node.js.
- if re.search(r'^#!(/[^/\n]*)*node\b', original):
- original = re.sub(r'^#', '//', original)
+ interpreter = get_interpreter(old_shebang_line)
- # Change non-writable files to be writable if needed.
- saved_mode = None
- if not os.access(path, os.W_OK):
- st = os.stat(path)
- saved_mode = st.st_mode
- os.chmod(path, saved_mode | stat.S_IWRITE)
+ # If there was only whitespace we don't have to do anything.
+ if not interpreter:
+ return False
- with open(path, 'wb') as new_file:
- if sys.version_info >= (2, 7):
- new_file.write(new_sbang_line.encode(encoding='UTF-8'))
- new_file.write(original.encode(encoding='UTF-8'))
+ # Store the file permissions, the patched version needs the same.
+ saved_mode = os.stat(path).st_mode
+
+ # No need to delete since we'll move it and overwrite the original.
+ patched = tempfile.NamedTemporaryFile('wb', delete=False)
+ patched.write(new_sbang_line)
+
+ # Note that in Python this does not go out of bounds even if interpreter is a
+ # short byte array.
+ # Note: if the interpreter string was encoded with UTF-16, there would have
+ # been a \0 byte between all characters of lua, node, php; meaning that it would
+ # lead to truncation of the interpreter. So we don't have to worry about weird
+ # encodings here, and just looking at bytes is justified.
+ if interpreter[-4:] == b'/lua' or interpreter[-7:] == b'/luajit':
+ # Use --! instead of #! on second line for lua.
+ patched.write(b'--!' + old_shebang_line[2:])
+ elif interpreter[-5:] == b'/node':
+ # Use //! instead of #! on second line for node.js.
+ patched.write(b'//!' + old_shebang_line[2:])
+ elif interpreter[-4:] == b'/php':
+ # Use <?php #!... ?> instead of #!... on second line for php.
+ patched.write(b'<?php ' + old_shebang_line + b' ?>')
else:
- new_file.write(new_sbang_line.encode('UTF-8'))
- new_file.write(original.encode('UTF-8'))
+ patched.write(old_shebang_line)
+
+ # After copying the remainder of the file, we can close the original
+ shutil.copyfileobj(original, patched)
- # Restore original permissions.
- if saved_mode is not None:
- os.chmod(path, saved_mode)
+ # And close the temporary file so we can move it.
+ patched.close()
- tty.debug("Patched overlong shebang in %s" % path)
+ # Overwrite original file with patched file, and keep the original mode
+ shutil.move(patched.name, path)
+ os.chmod(path, saved_mode)
+ return True
def filter_shebangs_in_directory(directory, filenames=None):
@@ -138,8 +166,8 @@ def filter_shebangs_in_directory(directory, filenames=None):
continue
# test the file for a long shebang, and filter
- if shebang_too_long(path):
- filter_shebang(path)
+ if filter_shebang(path):
+ tty.debug("Patched overlong shebang in %s" % path)
def install_sbang():