summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorTom Scogland <scogland1@llnl.gov>2021-06-04 00:23:37 -0700
committerGitHub <noreply@github.com>2021-06-04 09:23:37 +0200
commitb63a8b3e273bf6530658a81b2e43da36c1912c40 (patch)
treed577c8612f3d1dac30cfd7cf7e52e328279385b4 /lib
parentea390198f4546c064dc7a470c4cfff4b28bfae95 (diff)
downloadspack-b63a8b3e273bf6530658a81b2e43da36c1912c40.tar.gz
spack-b63a8b3e273bf6530658a81b2e43da36c1912c40.tar.bz2
spack-b63a8b3e273bf6530658a81b2e43da36c1912c40.tar.xz
spack-b63a8b3e273bf6530658a81b2e43da36c1912c40.zip
Speed-up version parsing and comparison (#22973)
The VALID_VERSION regex didn't check that the version string was completely valid, only that a prefix of it was. This version ensures the entire string represents a valid version. This makes a few related changes. 1. Make the SEGMENT_REGEX identify *which* arm it matches by what groups are populated, including whether it's a string or int component or a separator all at once. 2. Use the updated regex to parse the input once with a findall rather than twice, once with findall and once with split, since the version components and separators can be distinguished by their group status. 3. Rather than "convert to int, on exception stay string," if the int group is set then convert to int, if not then construct an instance of the VersionStrComponent class 4. VersionStrComponent now implements all of the special string comparison logic as part of its __lt__ and __eq__ methods to deal with infinity versions and also overloads comparison with integers. 5. Version now uses direct tuple comparison since it has no per-element special logic outside the VersionStrComponent class. Co-authored-by: Massimiliano Culpo <massimiliano.culpo@gmail.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/test/versions.py11
-rw-r--r--lib/spack/spack/version.py125
2 files changed, 84 insertions, 52 deletions
diff --git a/lib/spack/spack/test/versions.py b/lib/spack/spack/test/versions.py
index a33f7e08f7..8e6d5430ee 100644
--- a/lib/spack/spack/test/versions.py
+++ b/lib/spack/spack/test/versions.py
@@ -565,3 +565,14 @@ def test_list_highest():
assert vl2.highest_numeric() is None
assert vl2.preferred() == Version('develop')
assert vl2.lowest() == Version('master')
+
+
+@pytest.mark.parametrize('version_str', [
+ "foo 1.2.0",
+ "!",
+ "1!2"
+])
+def test_invalid_versions(version_str):
+ """Ensure invalid versions are rejected with a ValueError"""
+ with pytest.raises(ValueError):
+ Version(version_str)
diff --git a/lib/spack/spack/version.py b/lib/spack/spack/version.py
index 82c0afd53e..251c86b184 100644
--- a/lib/spack/spack/version.py
+++ b/lib/spack/spack/version.py
@@ -37,21 +37,15 @@ from spack.util.spack_yaml import syaml_dict
__all__ = ['Version', 'VersionRange', 'VersionList', 'ver']
# Valid version characters
-VALID_VERSION = re.compile(r'[A-Za-z0-9_.-]')
+VALID_VERSION = re.compile(r'^[A-Za-z0-9_.-]+$')
# regex for version segments
-SEGMENT_REGEX = re.compile(r'[a-zA-Z]+|[0-9]+')
+SEGMENT_REGEX = re.compile(r'(?:(?P<num>[0-9]+)|(?P<str>[a-zA-Z]+))(?P<sep>[_.-]*)')
# Infinity-like versions. The order in the list implies the comparison rules
infinity_versions = ['develop', 'main', 'master', 'head', 'trunk']
-
-def int_if_int(string):
- """Convert a string to int if possible. Otherwise, return a string."""
- try:
- return int(string)
- except ValueError:
- return string
+iv_min_len = min(len(s) for s in infinity_versions)
def coerce_versions(a, b):
@@ -96,26 +90,86 @@ def coerced(method):
return coercing_method
+class VersionStrComponent(object):
+ # NOTE: this is intentionally not a UserString, the abc instanceof
+ # check is slow enough to eliminate all gains
+ __slots__ = ['inf_ver', 'data']
+
+ def __init__(self, string):
+ self.inf_ver = None
+ self.data = string
+ if len(string) >= iv_min_len:
+ try:
+ self.inf_ver = infinity_versions.index(string)
+ except ValueError:
+ pass
+
+ def __hash__(self):
+ return hash(self.data)
+
+ def __str__(self):
+ return self.data
+
+ def __eq__(self, other):
+ if isinstance(other, VersionStrComponent):
+ return self.data == other.data
+ return self.data == other
+
+ def __lt__(self, other):
+ if isinstance(other, VersionStrComponent):
+ if self.inf_ver is not None:
+ if other.inf_ver is not None:
+ return self.inf_ver > other.inf_ver
+ return False
+ if other.inf_ver is not None:
+ return True
+
+ return self.data < other.data
+
+ if self.inf_ver is not None:
+ return False
+
+ # Numbers are always "newer" than letters.
+ # This is for consistency with RPM. See patch
+ # #60884 (and details) from bugzilla #50977 in
+ # the RPM project at rpm.org. Or look at
+ # rpmvercmp.c if you want to see how this is
+ # implemented there.
+ if isinstance(other, int):
+ return True
+
+ if isinstance(other, str):
+ return self < VersionStrComponent(other)
+ # If we get here, it's an unsupported comparison
+
+ raise ValueError("VersionStrComponent can only be compared with itself, "
+ "int and str")
+
+ def __gt__(self, other):
+ return not self.__lt__(other)
+
+
class Version(object):
"""Class to represent versions"""
+ __slots__ = ['version', 'separators', 'string']
def __init__(self, string):
if not isinstance(string, str):
string = str(string)
- if not VALID_VERSION.match(string):
- raise ValueError("Bad characters in version string: %s" % string)
-
# preserve the original string, but trimmed.
string = string.strip()
self.string = string
- # Split version into alphabetical and numeric segments
- segments = SEGMENT_REGEX.findall(string)
- self.version = tuple(int_if_int(seg) for seg in segments)
+ if not VALID_VERSION.match(string):
+ raise ValueError("Bad characters in version string: %s" % string)
- # Store the separators from the original version string as well.
- self.separators = tuple(SEGMENT_REGEX.split(string)[1:])
+ # Split version into alphabetical and numeric segments simultaneously
+ segments = SEGMENT_REGEX.findall(string)
+ self.version = tuple(
+ int(m[0]) if m[0] else VersionStrComponent(m[1]) for m in segments
+ )
+ self.separators = tuple(m[2] for m in segments)
@property
def dotted(self):
@@ -277,40 +331,8 @@ class Version(object):
if other is None:
return False
- # Coerce if other is not a Version
- # simple equality test first.
- if self.version == other.version:
- return False
-
- # Standard comparison of two numeric versions
- for a, b in zip(self.version, other.version):
- if a == b:
- continue
- else:
- if a in infinity_versions:
- if b in infinity_versions:
- return (infinity_versions.index(a) >
- infinity_versions.index(b))
- else:
- return False
- if b in infinity_versions:
- return True
-
- # Neither a nor b is infinity
- # Numbers are always "newer" than letters.
- # This is for consistency with RPM. See patch
- # #60884 (and details) from bugzilla #50977 in
- # the RPM project at rpm.org. Or look at
- # rpmvercmp.c if you want to see how this is
- # implemented there.
- if type(a) != type(b):
- return type(b) == int
- else:
- return a < b
-
- # If the common prefix is equal, the one
- # with more segments is bigger.
- return len(self.version) < len(other.version)
+ # Use tuple comparison assisted by VersionStrComponent for performance
+ return self.version < other.version
@coerced
def __eq__(self, other):
@@ -595,7 +617,6 @@ class VersionList(object):
else:
self.versions = [vlist]
else:
- vlist = list(vlist)
for v in vlist:
self.add(ver(v))