summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarmen Stoppels <harmenstoppels@gmail.com>2021-10-01 20:40:48 +0200
committerGitHub <noreply@github.com>2021-10-01 11:40:48 -0700
commit18760de972c1af92a88fecc4b36a4ecf001821cf (patch)
tree6646ce55065da71fc6398a56bb91375882001bd6
parentdf590bb6eed2384597a3f338ee9c3ab3a2730ab6 (diff)
downloadspack-18760de972c1af92a88fecc4b36a4ecf001821cf.tar.gz
spack-18760de972c1af92a88fecc4b36a4ecf001821cf.tar.bz2
spack-18760de972c1af92a88fecc4b36a4ecf001821cf.tar.xz
spack-18760de972c1af92a88fecc4b36a4ecf001821cf.zip
Spack install: handle failed restore of backup (#25647)
Spack has logic to preserve an installation prefix when it is being overwritten: if the new install fails, the old files are restored. This PR adds error handling for when this backup restoration fails (i.e. the new install fails, and then some unexpected error prevents restoration from the backup).
-rw-r--r--lib/spack/llnl/util/filesystem.py40
-rw-r--r--lib/spack/spack/installer.py100
-rw-r--r--lib/spack/spack/test/installer.py95
3 files changed, 200 insertions, 35 deletions
diff --git a/lib/spack/llnl/util/filesystem.py b/lib/spack/llnl/util/filesystem.py
index e44f8c608f..f3c2ee5ab1 100644
--- a/lib/spack/llnl/util/filesystem.py
+++ b/lib/spack/llnl/util/filesystem.py
@@ -656,6 +656,12 @@ def working_dir(dirname, **kwargs):
os.chdir(orig_dir)
+class CouldNotRestoreDirectoryBackup(RuntimeError):
+ def __init__(self, inner_exception, outer_exception):
+ self.inner_exception = inner_exception
+ self.outer_exception = outer_exception
+
+
@contextmanager
def replace_directory_transaction(directory_name, tmp_root=None):
"""Moves a directory to a temporary space. If the operations executed
@@ -683,29 +689,33 @@ def replace_directory_transaction(directory_name, tmp_root=None):
assert os.path.isabs(tmp_root)
tmp_dir = tempfile.mkdtemp(dir=tmp_root)
- tty.debug('TEMPORARY DIRECTORY CREATED [{0}]'.format(tmp_dir))
+ tty.debug('Temporary directory created [{0}]'.format(tmp_dir))
shutil.move(src=directory_name, dst=tmp_dir)
- tty.debug('DIRECTORY MOVED [src={0}, dest={1}]'.format(
- directory_name, tmp_dir
- ))
+ tty.debug('Directory moved [src={0}, dest={1}]'.format(directory_name, tmp_dir))
try:
yield tmp_dir
- except (Exception, KeyboardInterrupt, SystemExit):
- # Delete what was there, before copying back the original content
- if os.path.exists(directory_name):
- shutil.rmtree(directory_name)
- shutil.move(
- src=os.path.join(tmp_dir, directory_basename),
- dst=os.path.dirname(directory_name)
- )
- tty.debug('DIRECTORY RECOVERED [{0}]'.format(directory_name))
+ except (Exception, KeyboardInterrupt, SystemExit) as inner_exception:
+ # Try to recover the original directory, if this fails, raise a
+ # composite exception.
+ try:
+ # Delete what was there, before copying back the original content
+ if os.path.exists(directory_name):
+ shutil.rmtree(directory_name)
+ shutil.move(
+ src=os.path.join(tmp_dir, directory_basename),
+ dst=os.path.dirname(directory_name)
+ )
+ except Exception as outer_exception:
+ raise CouldNotRestoreDirectoryBackup(inner_exception, outer_exception)
+
+ tty.debug('Directory recovered [{0}]'.format(directory_name))
raise
else:
# Otherwise delete the temporary directory
- shutil.rmtree(tmp_dir)
- tty.debug('TEMPORARY DIRECTORY DELETED [{0}]'.format(tmp_dir))
+ shutil.rmtree(tmp_dir, ignore_errors=True)
+ tty.debug('Temporary directory deleted [{0}]'.format(tmp_dir))
def hash_directory(directory, ignore=[]):
diff --git a/lib/spack/spack/installer.py b/lib/spack/spack/installer.py
index d6a0bcabd7..2f1544ee04 100644
--- a/lib/spack/spack/installer.py
+++ b/lib/spack/spack/installer.py
@@ -85,6 +85,15 @@ STATUS_DEQUEUED = 'dequeued'
STATUS_REMOVED = 'removed'
+class InstallAction(object):
+ #: Don't perform an install
+ NONE = 0
+ #: Do a standard install
+ INSTALL = 1
+ #: Do an overwrite install
+ OVERWRITE = 2
+
+
def _check_last_phase(pkg):
"""
Ensures the specified package has a valid last phase before proceeding
@@ -1418,6 +1427,42 @@ class PackageInstaller(object):
for dependent_id in dependents.difference(task.dependents):
task.add_dependent(dependent_id)
+ def _install_action(self, task):
+ """
+ Determine whether the installation should be overwritten (if it already
+ exists) or skipped (if has been handled by another process).
+
+ If the package has not been installed yet, this will indicate that the
+ installation should proceed as normal (i.e. no need to transactionally
+ preserve the old prefix).
+ """
+ # If we don't have to overwrite, do a normal install
+ if task.pkg.spec.dag_hash() not in task.request.overwrite:
+ return InstallAction.INSTALL
+
+ # If it's not installed, do a normal install as well
+ rec, installed = self._check_db(task.pkg.spec)
+ if not installed:
+ return InstallAction.INSTALL
+
+ # Ensure install_tree projections have not changed.
+ assert task.pkg.prefix == rec.path
+
+ # If another process has overwritten this, we shouldn't install at all
+ if rec.installation_time >= task.request.overwrite_time:
+ return InstallAction.NONE
+
+ # If the install prefix is missing, warn about it, and proceed with
+ # normal install.
+ if not os.path.exists(task.pkg.prefix):
+ tty.debug("Missing installation to overwrite")
+ return InstallAction.INSTALL
+
+ # Otherwise, do an actual overwrite install. We backup the original
+ # install directory, put the old prefix
+ # back on failure
+ return InstallAction.OVERWRITE
+
def install(self):
"""
Install the requested package(s) and or associated dependencies.
@@ -1561,26 +1606,12 @@ class PackageInstaller(object):
# Proceed with the installation since we have an exclusive write
# lock on the package.
try:
- if pkg.spec.dag_hash() in task.request.overwrite:
- rec, _ = self._check_db(pkg.spec)
- if rec and rec.installed:
- if rec.installation_time < task.request.overwrite_time:
- # If it's actually overwriting, do a fs transaction
- if os.path.exists(rec.path):
- with fs.replace_directory_transaction(
- rec.path):
- # fs transaction will put the old prefix
- # back on failure, so make sure to keep it.
- keep_prefix = True
- self._install_task(task)
- else:
- tty.debug("Missing installation to overwrite")
- self._install_task(task)
- else:
- # overwriting nothing
- self._install_task(task)
- else:
+ action = self._install_action(task)
+
+ if action == InstallAction.INSTALL:
self._install_task(task)
+ elif action == InstallAction.OVERWRITE:
+ OverwriteInstall(self, spack.store.db, task).install()
self._update_installed(task)
@@ -1631,7 +1662,7 @@ class PackageInstaller(object):
finally:
# Remove the install prefix if anything went wrong during
# install.
- if not keep_prefix:
+ if not keep_prefix and not action == InstallAction.OVERWRITE:
pkg.remove_prefix()
# The subprocess *may* have removed the build stage. Mark it
@@ -1883,6 +1914,35 @@ def build_process(pkg, install_args):
return installer.run()
+class OverwriteInstall(object):
+ def __init__(self, installer, database, task, tmp_root=None):
+ self.installer = installer
+ self.database = database
+ self.task = task
+ self.tmp_root = tmp_root
+
+ def install(self):
+ """
+ Try to run the install task overwriting the package prefix.
+ If this fails, try to recover the original install prefix. If that fails
+ too, mark the spec as uninstalled. This function always the original
+ install error if installation fails.
+ """
+ try:
+ with fs.replace_directory_transaction(self.task.pkg.prefix, self.tmp_root):
+ self.installer._install_task(self.task)
+ except fs.CouldNotRestoreDirectoryBackup as e:
+ self.database.remove(self.task.pkg.spec)
+ tty.error('Recovery of install dir of {0} failed due to '
+ '{1}: {2}. The spec is now uninstalled.'.format(
+ self.task.pkg.name,
+ e.outer_exception.__class__.__name__,
+ str(e.outer_exception)))
+
+ # Unwrap the actual installation exception.
+ raise e.inner_exception
+
+
class BuildTask(object):
"""Class for representing the build task for a package."""
diff --git a/lib/spack/spack/test/installer.py b/lib/spack/spack/test/installer.py
index 37ce6f5bdb..4f7f24dc24 100644
--- a/lib/spack/spack/test/installer.py
+++ b/lib/spack/spack/test/installer.py
@@ -4,6 +4,7 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os
+import shutil
import py
import pytest
@@ -1177,3 +1178,97 @@ def test_install_skip_patch(install_mockery, mock_fetch):
spec, install_args = const_arg[0]
assert inst.package_id(spec.package) in installer.installed
+
+
+def test_overwrite_install_backup_success(temporary_store, config, mock_packages,
+ tmpdir):
+ """
+ When doing an overwrite install that fails, Spack should restore the backup
+ of the original prefix, and leave the original spec marked installed.
+ """
+ # Where to store the backups
+ backup = str(tmpdir.mkdir("backup"))
+
+ # Get a build task. TODO: refactor this to avoid calling internal methods
+ const_arg = installer_args(["b"])
+ installer = create_installer(const_arg)
+ installer._init_queue()
+ task = installer._pop_task()
+
+ # Make sure the install prefix exists with some trivial file
+ installed_file = os.path.join(task.pkg.prefix, 'some_file')
+ fs.touchp(installed_file)
+
+ class InstallerThatWipesThePrefixDir:
+ def _install_task(self, task):
+ shutil.rmtree(task.pkg.prefix, ignore_errors=True)
+ fs.mkdirp(task.pkg.prefix)
+ raise Exception("Some fatal install error")
+
+ class FakeDatabase:
+ called = False
+
+ def remove(self, spec):
+ self.called = True
+
+ fake_installer = InstallerThatWipesThePrefixDir()
+ fake_db = FakeDatabase()
+ overwrite_install = inst.OverwriteInstall(
+ fake_installer, fake_db, task, tmp_root=backup)
+
+ # Installation should throw the installation exception, not the backup
+ # failure.
+ with pytest.raises(Exception, match='Some fatal install error'):
+ overwrite_install.install()
+
+ # Make sure the package is not marked uninstalled and the original dir
+ # is back.
+ assert not fake_db.called
+ assert os.path.exists(installed_file)
+
+
+def test_overwrite_install_backup_failure(temporary_store, config, mock_packages,
+ tmpdir):
+ """
+ When doing an overwrite install that fails, Spack should try to recover the
+ original prefix. If that fails, the spec is lost, and it should be removed
+ from the database.
+ """
+ # Where to store the backups
+ backup = str(tmpdir.mkdir("backup"))
+
+ class InstallerThatAccidentallyDeletesTheBackupDir:
+ def _install_task(self, task):
+ # Remove the backup directory so that restoring goes terribly wrong
+ shutil.rmtree(backup)
+ raise Exception("Some fatal install error")
+
+ class FakeDatabase:
+ called = False
+
+ def remove(self, spec):
+ self.called = True
+
+ # Get a build task. TODO: refactor this to avoid calling internal methods
+ const_arg = installer_args(["b"])
+ installer = create_installer(const_arg)
+ installer._init_queue()
+ task = installer._pop_task()
+
+ # Make sure the install prefix exists
+ installed_file = os.path.join(task.pkg.prefix, 'some_file')
+ fs.touchp(installed_file)
+
+ fake_installer = InstallerThatAccidentallyDeletesTheBackupDir()
+ fake_db = FakeDatabase()
+ overwrite_install = inst.OverwriteInstall(
+ fake_installer, fake_db, task, tmp_root=backup)
+
+ # Installation should throw the installation exception, not the backup
+ # failure.
+ with pytest.raises(Exception, match='Some fatal install error'):
+ overwrite_install.install()
+
+ # Make sure that `remove` was called on the database after an unsuccessful
+ # attempt to restore the backup.
+ assert fake_db.called