From e3b220f699764f5b6e33e529feef8b2bec713288 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 22 Jun 2021 12:51:31 -0400 Subject: Implement CVS fetcher (#23212) Spack packages can now fetch versions from CVS repositories. Note this fetch mechanism is unsafe unless using :extssh:. Most public CVS repositories use an insecure protocol implemented as part of CVS. --- .github/workflows/unit_tests.yaml | 23 ++-- lib/spack/docs/packaging_guide.rst | 54 +++++++- lib/spack/spack/fetch_strategy.py | 111 +++++++++++++++ lib/spack/spack/test/conftest.py | 149 +++++++++++++++++++++ lib/spack/spack/test/cvs_fetch.py | 111 +++++++++++++++ .../builtin.mock/packages/cvs-test/package.py | 13 ++ 6 files changed, 449 insertions(+), 12 deletions(-) create mode 100644 lib/spack/spack/test/cvs_fetch.py create mode 100644 var/spack/repos/builtin.mock/packages/cvs-test/package.py diff --git a/.github/workflows/unit_tests.yaml b/.github/workflows/unit_tests.yaml index 1cc60f593a..6d5901edd1 100644 --- a/.github/workflows/unit_tests.yaml +++ b/.github/workflows/unit_tests.yaml @@ -39,7 +39,7 @@ jobs: python-version: 3.9 - name: Install Python packages run: | - pip install --upgrade pip six setuptools flake8 mypy>=0.800 black types-six + pip install --upgrade pip six setuptools flake8 mypy>=0.800 black types-six types-python-dateutil - name: Setup git configuration run: | # Need this for the git tests to succeed. @@ -62,7 +62,7 @@ jobs: sudo apt-get install -y coreutils ninja-build graphviz - name: Install Python packages run: | - pip install --upgrade pip six setuptools + pip install --upgrade pip six setuptools python-dateutil pip install --upgrade -r lib/spack/docs/requirements.txt - name: Build documentation run: | @@ -129,14 +129,15 @@ jobs: run: | sudo apt-get -y update # Needed for unit tests - sudo apt-get install -y coreutils gfortran graphviz gnupg2 mercurial - sudo apt-get install -y ninja-build patchelf + sudo apt-get -y install \ + coreutils cvs gfortran graphviz gnupg2 mercurial ninja-build \ + patchelf # Needed for kcov sudo apt-get -y install cmake binutils-dev libcurl4-openssl-dev sudo apt-get -y install zlib1g-dev libdw-dev libiberty-dev - name: Install Python packages run: | - pip install --upgrade pip six setuptools codecov coverage + pip install --upgrade pip python-dateutil six setuptools codecov coverage - name: Setup git configuration run: | # Need this for the git tests to succeed. @@ -200,7 +201,7 @@ jobs: sudo apt-get -y install zlib1g-dev libdw-dev libiberty-dev - name: Install Python packages run: | - pip install --upgrade pip six setuptools codecov coverage + pip install --upgrade pip six setuptools codecov coverage python-dateutil - name: Setup git configuration run: | # Need this for the git tests to succeed. @@ -286,7 +287,7 @@ jobs: shell: runuser -u spack-test -- bash {0} run: | source share/spack/setup-env.sh - spack unit-test -k 'not svn and not hg' -x --verbose + spack unit-test -k 'not cvs and not svn and not hg' -x --verbose # Test for the clingo based solver (using clingo-cffi) clingo-cffi: needs: [ validate, style, documentation, changes ] @@ -302,8 +303,9 @@ jobs: run: | sudo apt-get -y update # Needed for unit tests - sudo apt-get install -y coreutils gfortran graphviz gnupg2 mercurial - sudo apt-get install -y ninja-build patchelf + sudo apt-get -y install \ + coreutils cvs gfortran graphviz gnupg2 mercurial ninja-build \ + patchelf # Needed for kcov sudo apt-get -y install cmake binutils-dev libcurl4-openssl-dev sudo apt-get -y install zlib1g-dev libdw-dev libiberty-dev @@ -320,7 +322,7 @@ jobs: make -C ${KCOV_ROOT}/build && sudo make -C ${KCOV_ROOT}/build install - name: Install Python packages run: | - pip install --upgrade pip six setuptools codecov coverage clingo + pip install --upgrade pip six setuptools codecov coverage clingo python-dateutil - name: Setup git configuration run: | # Need this for the git tests to succeed. @@ -365,6 +367,7 @@ jobs: pip install --upgrade pip six setuptools pip install --upgrade codecov coverage pip install --upgrade flake8 pep8-naming mypy + pip install --upgrade python-dateutil - name: Setup Homebrew packages run: | brew install dash fish gcc gnupg2 kcov diff --git a/lib/spack/docs/packaging_guide.rst b/lib/spack/docs/packaging_guide.rst index b9cd85ed82..80ddf98c38 100644 --- a/lib/spack/docs/packaging_guide.rst +++ b/lib/spack/docs/packaging_guide.rst @@ -920,12 +920,13 @@ For some packages, source code is provided in a Version Control System (VCS) repository rather than in a tarball. Spack can fetch packages from VCS repositories. Currently, Spack supports fetching with `Git `_, `Mercurial (hg) `_, `Subversion (svn) -`_, and `Go `_. In all cases, the destination +`_, `CVS (cvs) `_, and `Go `_. +In all cases, the destination is the standard stage source path. To fetch a package from a source repository, Spack needs to know which VCS to use and where to download from. Much like with ``url``, package -authors can specify a class-level ``git``, ``hg``, ``svn``, or ``go`` +authors can specify a class-level ``git``, ``hg``, ``svn``, ``cvs``, or ``go`` attribute containing the correct download location. Many packages developed with Git have both a Git repository as well as @@ -1173,6 +1174,55 @@ you can check out a branch or tag by changing the URL. If you want to package multiple branches, simply add a ``svn`` argument to each version directive. +.. _cvs-fetch: + +^^^ +CVS +^^^ + +CVS (Concurrent Versions System) is an old centralized version control +system. It is a predecessor of Subversion. + +To fetch with CVS, use the ``cvs``, branch, and ``date`` parameters. +The destination directory will be the standard stage source path. + +Fetching the head + Simply add a ``cvs`` parameter to the package: + + .. code-block:: python + + class Example(Package): + + cvs = ":pserver:outreach.scidac.gov/cvsroot%module=modulename" + + version('1.1.2.4') + + CVS repository locations are described using an older syntax that + is different from today's ubiquitous URL syntax. ``:pserver:`` + denotes the transport method. CVS servers can host multiple + repositories (called "modules") at the same location, and one needs + to specify both the server location and the module name to access. + Spack combines both into one string using the ``%module=modulename`` + suffix shown above. + + This download method is untrusted. + +Fetching a date + Versions in CVS are commonly specified by date. To fetch a + particular branch or date, add a ``branch`` and/or ``date`` argument + to the version directive: + + .. code-block:: python + + version('2021.4.22', branch='branchname', date='2021-04-22') + + Unfortunately, CVS does not identify repository-wide commits via a + revision or hash like Subversion, Git, or Mercurial do. This makes + it impossible to specify an exact commit to check out. + +CVS has more features, but since CVS is rarely used these days, Spack +does not support all of them. + .. _go-fetch: ^^ diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index b4ca8b3e70..e98c9d8724 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -960,6 +960,117 @@ class GitFetchStrategy(VCSFetchStrategy): return '[git] {0}'.format(self._repo_info()) +@fetcher +class CvsFetchStrategy(VCSFetchStrategy): + """Fetch strategy that gets source code from a CVS repository. + Use like this in a package: + + version('name', + cvs=':pserver:anonymous@www.example.com:/cvsroot%module=modulename') + + Optionally, you can provide a branch and/or a date for the URL: + + version('name', + cvs=':pserver:anonymous@www.example.com:/cvsroot%module=modulename', + branch='branchname', date='date') + + Repositories are checked out into the standard stage source path directory. + """ + url_attr = 'cvs' + optional_attrs = ['branch', 'date'] + + def __init__(self, **kwargs): + # Discards the keywords in kwargs that may conflict with the next call + # to __init__ + forwarded_args = copy.copy(kwargs) + forwarded_args.pop('name', None) + super(CvsFetchStrategy, self).__init__(**forwarded_args) + + self._cvs = None + if self.branch is not None: + self.branch = str(self.branch) + if self.date is not None: + self.date = str(self.date) + + @property + def cvs(self): + if not self._cvs: + self._cvs = which('cvs', required=True) + return self._cvs + + @property + def cachable(self): + return self.cache_enabled and (bool(self.branch) or bool(self.date)) + + def source_id(self): + if not (self.branch or self.date): + # We need a branch or a date to make a checkout reproducible + return None + id = 'id' + if self.branch: + id += '-branch=' + self.branch + if self.date: + id += '-date=' + self.date + return id + + def mirror_id(self): + if not (self.branch or self.date): + # We need a branch or a date to make a checkout reproducible + return None + repo_path = url_util.parse(self.url).path + result = os.path.sep.join(['cvs', repo_path]) + if self.branch: + result += '%branch=' + self.branch + if self.date: + result += '%date=' + self.date + return result + + @_needs_stage + def fetch(self): + if self.stage.expanded: + tty.debug('Already fetched {0}'.format(self.stage.source_path)) + return + + tty.debug('Checking out CVS repository: {0}'.format(self.url)) + + with temp_cwd(): + url, module = self.url.split('%module=') + # Check out files + args = ['-z9', '-d', url, 'checkout'] + if self.branch is not None: + args.extend(['-r', self.branch]) + if self.date is not None: + args.extend(['-D', self.date]) + args.append(module) + self.cvs(*args) + # Rename repo + repo_name = get_single_file('.') + self.stage.srcdir = repo_name + shutil.move(repo_name, self.stage.source_path) + + def _remove_untracked_files(self): + """Removes untracked files in a CVS repository.""" + with working_dir(self.stage.source_path): + status = self.cvs('-qn', 'update', output=str) + for line in status.split('\n'): + if re.match(r'^[?]', line): + path = line[2:].strip() + if os.path.isfile(path): + os.unlink(path) + + def archive(self, destination): + super(CvsFetchStrategy, self).archive(destination, exclude='CVS') + + @_needs_stage + def reset(self): + self._remove_untracked_files() + with working_dir(self.stage.source_path): + self.cvs('update', '-C', '.') + + def __str__(self): + return "[cvs] %s" % self.url + + @fetcher class SvnFetchStrategy(VCSFetchStrategy): diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index 6e003dd870..80d6305711 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -12,9 +12,21 @@ import os import os.path import re import shutil +import sys import tempfile import xml.etree.ElementTree +if sys.version_info >= (2, 7): + # CVS outputs dates in different formats on different systems. We are using + # the dateutil package to parse these dates. This package does not exist + # for Python <2.7. That means that we cannot test checkouts "by date" for + # CVS respositories. (We can still use CVS repos with all features, only + # our tests break.) + from dateutil.parser import parse as parse_date +else: + def parse_date(string): + pytest.skip("dateutil package not available for Python 2.6") + import py import pytest @@ -908,6 +920,143 @@ def mock_archive(request, tmpdir_factory): expanded_archive_basedir=spack.stage._source_path_subdir) +@pytest.fixture(scope='session') +def mock_cvs_repository(tmpdir_factory): + """Creates a very simple CVS repository with two commits and a branch.""" + cvs = spack.util.executable.which('cvs', required=True) + + tmpdir = tmpdir_factory.mktemp('mock-cvs-repo-dir') + tmpdir.ensure(spack.stage._source_path_subdir, dir=True) + repodir = tmpdir.join(spack.stage._source_path_subdir) + cvsroot = str(repodir) + + # The CVS repository and source tree need to live in a different directories + sourcedirparent = tmpdir_factory.mktemp('mock-cvs-source-dir') + module = spack.stage._source_path_subdir + url = cvsroot + "%module=" + module + sourcedirparent.ensure(module, dir=True) + sourcedir = sourcedirparent.join(module) + + def format_date(date): + if date is None: + return None + return date.strftime('%Y-%m-%d %H:%M:%S') + + def get_cvs_timestamp(output): + """Find the most recent CVS time stamp in a `cvs log` output""" + latest_timestamp = None + for line in output.splitlines(): + m = re.search(r'date:\s+([^;]*);', line) + if m: + timestamp = parse_date(m.group(1)) + if latest_timestamp is None: + latest_timestamp = timestamp + else: + latest_timestamp = max(latest_timestamp, timestamp) + return latest_timestamp + + # We use this to record the time stamps for when we create CVS revisions, + # so that we can later check that we retrieve the proper commits when + # specifying a date. (CVS guarantees checking out the lastest revision + # before or on the specified date). As we create each revision, we + # separately record the time by querying CVS. + revision_date = {} + + # Initialize the repository + with sourcedir.as_cwd(): + cvs('-d', cvsroot, 'init') + cvs('-d', cvsroot, 'import', '-m', 'initial mock repo commit', + module, 'mockvendor', 'mockrelease') + with sourcedirparent.as_cwd(): + cvs('-d', cvsroot, 'checkout', module) + + # Commit file r0 + r0_file = 'r0_file' + sourcedir.ensure(r0_file) + cvs('-d', cvsroot, 'add', r0_file) + cvs('-d', cvsroot, 'commit', '-m', 'revision 0', r0_file) + output = cvs('log', '-N', r0_file, output=str) + revision_date['1.1'] = format_date(get_cvs_timestamp(output)) + + # Commit file r1 + r1_file = 'r1_file' + sourcedir.ensure(r1_file) + cvs('-d', cvsroot, 'add', r1_file) + cvs('-d', cvsroot, 'commit', '-m' 'revision 1', r1_file) + output = cvs('log', '-N', r0_file, output=str) + revision_date['1.2'] = format_date(get_cvs_timestamp(output)) + + # Create branch 'mock-branch' + cvs('-d', cvsroot, 'tag', 'mock-branch-root') + cvs('-d', cvsroot, 'tag', '-b', 'mock-branch') + + # CVS does not have the notion of a unique branch; branches and revisions + # are managed separately for every file + def get_branch(): + """Return the branch name if all files are on the same branch, else + return None. Also return None if all files are on the trunk.""" + lines = cvs('-d', cvsroot, 'status', '-v', output=str).splitlines() + branch = None + for line in lines: + m = re.search(r'(\S+)\s+[(]branch:', line) + if m: + tag = m.group(1) + if branch is None: + # First branch name found + branch = tag + elif tag == branch: + # Later branch name found; all branch names found so far + # agree + pass + else: + # Later branch name found; branch names differ + branch = None + break + return branch + + # CVS does not have the notion of a unique revision; usually, one uses + # commit dates instead + def get_date(): + """Return latest date of the revisions of all files""" + output = cvs('log', '-N', r0_file, output=str) + timestamp = get_cvs_timestamp(output) + if timestamp is None: + return None + return format_date(timestamp) + + checks = { + 'default': Bunch( + file=r1_file, + branch=None, + date=None, + args={'cvs': url}, + ), + 'branch': Bunch( + file=r1_file, + branch='mock-branch', + date=None, + args={'cvs': url, 'branch': 'mock-branch'}, + ), + 'date': Bunch( + file=r0_file, + branch=None, + date=revision_date['1.1'], + args={'cvs': url, + 'date': revision_date['1.1']}, + ), + } + + test = Bunch( + checks=checks, + url=url, + get_branch=get_branch, + get_date=get_date, + path=str(repodir), + ) + + yield test + + @pytest.fixture(scope='session') def mock_git_repository(tmpdir_factory): """Creates a simple git repository with two branches, diff --git a/lib/spack/spack/test/cvs_fetch.py b/lib/spack/spack/test/cvs_fetch.py new file mode 100644 index 0000000000..8bf4c20acb --- /dev/null +++ b/lib/spack/spack/test/cvs_fetch.py @@ -0,0 +1,111 @@ +# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import os + +import pytest + +from llnl.util.filesystem import touch, working_dir, mkdirp + +import spack.repo +import spack.config +from spack.spec import Spec +from spack.stage import Stage +from spack.version import ver +from spack.fetch_strategy import CvsFetchStrategy +from spack.util.executable import which + + +pytestmark = pytest.mark.skipif( + not which('cvs'), + reason='requires CVS to be installed') + + +@pytest.mark.parametrize("type_of_test", ['default', 'branch', 'date']) +def test_fetch( + type_of_test, + mock_cvs_repository, + config, + mutable_mock_repo +): + """Tries to: + + 1. Fetch the repo using a fetch strategy constructed with + supplied args (they depend on type_of_test). + 2. Check whether the checkout is on the correct branch or date + 3. Check if the test_file is in the checked out repository. + 4. Add and remove some files, then reset the repo, and + ensure it's all there again. + + CVS does not have the notion of a unique branch; branches and revisions + are managed separately for every file. + """ + # Retrieve the right test parameters + test = mock_cvs_repository.checks[type_of_test] + get_branch = mock_cvs_repository.get_branch + get_date = mock_cvs_repository.get_date + + # Construct the package under test + spec = Spec('cvs-test') + spec.concretize() + pkg = spack.repo.get(spec) + pkg.versions[ver('cvs')] = test.args + + # Enter the stage directory and check some properties + with pkg.stage: + pkg.do_stage() + + with working_dir(pkg.stage.source_path): + # Check branch + if test.branch is not None: + assert get_branch() == test.branch + + # Check date + if test.date is not None: + assert get_date() <= test.date + + file_path = os.path.join(pkg.stage.source_path, test.file) + assert os.path.isdir(pkg.stage.source_path) + assert os.path.isfile(file_path) + + os.unlink(file_path) + assert not os.path.isfile(file_path) + + untracked_file = 'foobarbaz' + touch(untracked_file) + assert os.path.isfile(untracked_file) + pkg.do_restage() + assert not os.path.isfile(untracked_file) + + assert os.path.isdir(pkg.stage.source_path) + assert os.path.isfile(file_path) + + +def test_cvs_extra_fetch(tmpdir): + """Ensure a fetch after downloading is effectively a no-op.""" + testpath = str(tmpdir) + + fetcher = CvsFetchStrategy( + cvs=':pserver:not-a-real-cvs-repo%module=not-a-real-module') + assert fetcher is not None + + with Stage(fetcher, path=testpath) as stage: + assert stage is not None + + source_path = stage.source_path + mkdirp(source_path) + + # TODO: This doesn't look as if it was testing what this function's + # comment says it is testing. However, the other `test_*_extra_fetch` + # functions (for svn, git, hg) use equivalent code. + # + # We're calling `fetcher.fetch` twice as this might be what we want to + # do, and it can't hurt. See + # for a discussion on this. + + # Fetch once + fetcher.fetch() + # Fetch a second time + fetcher.fetch() diff --git a/var/spack/repos/builtin.mock/packages/cvs-test/package.py b/var/spack/repos/builtin.mock/packages/cvs-test/package.py new file mode 100644 index 0000000000..23d714595b --- /dev/null +++ b/var/spack/repos/builtin.mock/packages/cvs-test/package.py @@ -0,0 +1,13 @@ +# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +from spack import * + + +class CvsTest(Package): + """Mock package that uses cvs for fetching.""" + homepage = "http://www.cvs-fetch-example.com" + + version('cvs', cvs='to-be-filled-in-by-test') -- cgit v1.2.3-60-g2f50