From 53ec16c9e5d60fbc85ace55ef12a802181a9d81d Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Mon, 6 May 2019 16:35:09 -0700 Subject: make license check more-strict The license text is now expected to match almost exactly (not accounting for formatting in different file types (e.g. rst vs. bash script vs. python) --- lib/spack/spack/cmd/license.py | 130 +++++++++++++++++++++++++----------- lib/spack/spack/test/cmd/license.py | 2 +- 2 files changed, 93 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/cmd/license.py b/lib/spack/spack/cmd/license.py index 5dd42bcbb0..7ae5f1c72b 100644 --- a/lib/spack/spack/cmd/license.py +++ b/lib/spack/spack/cmd/license.py @@ -7,6 +7,7 @@ from __future__ import print_function import os import re +from collections import defaultdict import llnl.util.tty as tty @@ -89,50 +90,103 @@ def list_files(args): print(os.path.join(spack.paths.spack_root, relpath)) +# Error codes for license verification. All values are chosen such that +# bool(value) evaluates to True +OLD_LICENSE, SPDX_MISMATCH, GENERAL_MISMATCH = range(1, 4) + + +class LicenseError(object): + def __init__(self): + self.error_counts = defaultdict(int) + + def add_error(self, error): + self.error_counts[error] += 1 + + def has_errors(self): + return sum(self.error_counts.values()) > 0 + + def error_messages(self): + total = sum(self.error_counts.values()) + missing = self.error_counts[GENERAL_MISMATCH] + spdx_mismatch = self.error_counts[SPDX_MISMATCH] + old_license = self.error_counts[OLD_LICENSE] + return ( + '%d improperly licensed files' % (total), + 'files with wrong SPDX-License-Identifier: %d' % spdx_mismatch, + 'files with old license header: %d' % old_license, + 'files not containing expected license: %d' % missing) + + +def _check_license(lines, path): + license_lines = [ + r'Copyright 2013-(?:201[789]|202\d) Lawrence Livermore National Security, LLC and other', # noqa: E501 + r'Spack Project Developers\. See the top-level COPYRIGHT file for details.', # noqa: E501 + r'SPDX-License-Identifier: \(Apache-2\.0 OR MIT\)' + ] + + strict_date = r'Copyright 2013-2019' + + found = [] + + for line in lines: + line = re.sub(r'^[\s#\.]*', '', line) + line = line.rstrip() + for i, license_line in enumerate(license_lines): + if re.match(license_line, line): + # The first line of the license contains the copyright date. + # We allow it to be out of date but print a warning if it is + # out of date. + if i == 0: + if not re.search(strict_date, line): + tty.debug('{0}: copyright date mismatch'.format(path)) + found.append(i) + + if len(found) == len(license_lines) and found == list(sorted(found)): + return + + def old_license(line, path): + if re.search('This program is free software', line): + print('{0}: has old LGPL license header'.format(path)) + return OLD_LICENSE + + # If the SPDX identifier is present, then there is a mismatch (since it + # did not match the above regex) + def wrong_spdx_identifier(line, path): + m = re.search(r'SPDX-License-Identifier: ([^\n]*)', line) + if m and m.group(1) != apache2_mit_spdx: + print('{0}: SPDX license identifier mismatch' + '(expecting {1}, found {2})' + .format(path, apache2_mit_spdx, m.group(1))) + return SPDX_MISMATCH + + checks = [old_license, wrong_spdx_identifier] + + for line in lines: + for check in checks: + error = check(line, path) + if error: + return error + + print('{0}: the license does not match the expected format'.format(path)) + return GENERAL_MISMATCH + + def verify(args): """verify that files in spack have the right license header""" - errors = 0 - missing = 0 - old_license = 0 + + license_errors = LicenseError() for relpath in _licensed_files(args.root): path = os.path.join(args.root, relpath) with open(path) as f: - lines = [line for line in f] - - if not any(re.match(regex, relpath) for regex in lgpl_exceptions): - if any(re.match(r'^# This program is free software', line) - for line in lines): - print('%s: has old LGPL license header' % path) - old_license += 1 - continue - - # how we'll find licenses in files - spdx_expr = r'SPDX-License-Identifier: ([^\n]*)' - - # check first lines for required header - first_n_lines = ''.join(lines[:license_lines]) - match = re.search(spdx_expr, first_n_lines) - - if not match: - print('%s: no license header' % path) - missing += 1 - continue - - correct = apache2_mit_spdx - actual = match.group(1) - if actual != correct: - print("%s: labeled as '%s', but should be '%s'" - % (path, actual, correct)) - errors += 1 - continue - - if any([errors, missing, old_license]): - tty.die( - '%d improperly licensed files' % (errors + missing + old_license), - 'files with no SPDX-License-Identifier: %d' % missing, - 'files with wrong SPDX-License-Identifier: %d' % errors, - 'files with old license header: %d' % old_license) + lines = [line for line in f][:license_lines] + + error = _check_license(lines, path) + if error: + license_errors.add_error(error) + + if license_errors.has_errors(): + tty.die(*license_errors.error_messages()) else: tty.msg('No license issues found.') diff --git a/lib/spack/spack/test/cmd/license.py b/lib/spack/spack/test/cmd/license.py index fe02cb18a5..630b93f68b 100644 --- a/lib/spack/spack/test/cmd/license.py +++ b/lib/spack/spack/test/cmd/license.py @@ -61,7 +61,7 @@ def test_verify(tmpdir): assert str(old_lgpl_header) in out assert str(correct_header) not in out assert '3 improperly licensed files' in out - assert re.search(r'files with no SPDX-License-Identifier:\s*1', out) + assert re.search(r'files not containing expected license:\s*1', out) assert re.search(r'files with wrong SPDX-License-Identifier:\s*1', out) assert re.search(r'files with old license header:\s*1', out) -- cgit v1.2.3-60-g2f50