summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/spack/spack/cmd/buildcache.py7
-rw-r--r--lib/spack/spack/oci/oci.py39
-rw-r--r--lib/spack/spack/test/oci/mock_registry.py37
-rw-r--r--lib/spack/spack/test/oci/urlopen.py30
-rw-r--r--lib/spack/spack/test/util/util_url.py26
-rw-r--r--lib/spack/spack/util/url.py42
6 files changed, 171 insertions, 10 deletions
diff --git a/lib/spack/spack/cmd/buildcache.py b/lib/spack/spack/cmd/buildcache.py
index 543570cf28..dae2acdeb9 100644
--- a/lib/spack/spack/cmd/buildcache.py
+++ b/lib/spack/spack/cmd/buildcache.py
@@ -13,7 +13,6 @@ import os
import shutil
import sys
import tempfile
-import urllib.request
from typing import Dict, List, Optional, Tuple, Union
import llnl.util.tty as tty
@@ -54,6 +53,7 @@ from spack.oci.image import (
from spack.oci.oci import (
copy_missing_layers_with_retry,
get_manifest_and_config_with_retry,
+ list_tags,
upload_blob_with_retry,
upload_manifest_with_retry,
)
@@ -856,10 +856,7 @@ def _config_from_tag(image_ref: ImageReference, tag: str) -> Optional[dict]:
def _update_index_oci(image_ref: ImageReference, tmpdir: str, pool: MaybePool) -> None:
- request = urllib.request.Request(url=image_ref.tags_url())
- response = spack.oci.opener.urlopen(request)
- spack.oci.opener.ensure_status(request, response, 200)
- tags = json.load(response)["tags"]
+ tags = list_tags(image_ref)
# Fetch all image config files in parallel
spec_dicts = pool.starmap(
diff --git a/lib/spack/spack/oci/oci.py b/lib/spack/spack/oci/oci.py
index fefc66674e..09e79df347 100644
--- a/lib/spack/spack/oci/oci.py
+++ b/lib/spack/spack/oci/oci.py
@@ -11,7 +11,7 @@ import urllib.error
import urllib.parse
import urllib.request
from http.client import HTTPResponse
-from typing import NamedTuple, Tuple
+from typing import List, NamedTuple, Tuple
from urllib.request import Request
import llnl.util.tty as tty
@@ -27,6 +27,7 @@ import spack.spec
import spack.stage
import spack.traverse
import spack.util.crypto
+import spack.util.url
from .image import Digest, ImageReference
@@ -69,6 +70,42 @@ def with_query_param(url: str, param: str, value: str) -> str:
)
+def list_tags(ref: ImageReference, _urlopen: spack.oci.opener.MaybeOpen = None) -> List[str]:
+ """Retrieves the list of tags associated with an image, handling pagination."""
+ _urlopen = _urlopen or spack.oci.opener.urlopen
+ tags = set()
+ fetch_url = ref.tags_url()
+
+ while True:
+ # Fetch tags
+ request = Request(url=fetch_url)
+ response = _urlopen(request)
+ spack.oci.opener.ensure_status(request, response, 200)
+ tags.update(json.load(response)["tags"])
+
+ # Check for pagination
+ link_header = response.headers["Link"]
+
+ if link_header is None:
+ break
+
+ tty.debug(f"OCI tag pagination: {link_header}")
+
+ rel_next_value = spack.util.url.parse_link_rel_next(link_header)
+
+ if rel_next_value is None:
+ break
+
+ rel_next = urllib.parse.urlparse(rel_next_value)
+
+ if rel_next.scheme not in ("https", ""):
+ break
+
+ fetch_url = ref.endpoint(rel_next_value)
+
+ return sorted(tags)
+
+
def upload_blob(
ref: ImageReference,
file: str,
diff --git a/lib/spack/spack/test/oci/mock_registry.py b/lib/spack/spack/test/oci/mock_registry.py
index cc39904f3c..288598089d 100644
--- a/lib/spack/spack/test/oci/mock_registry.py
+++ b/lib/spack/spack/test/oci/mock_registry.py
@@ -151,7 +151,9 @@ class InMemoryOCIRegistry(DummyServer):
A third option is to use the chunked upload, but this is not implemented here, because
it's typically a major performance hit in upload speed, so we're not using it in Spack."""
- def __init__(self, domain: str, allow_single_post: bool = True) -> None:
+ def __init__(
+ self, domain: str, allow_single_post: bool = True, tags_per_page: int = 100
+ ) -> None:
super().__init__(domain)
self.router.register("GET", r"/v2/", self.index)
self.router.register("HEAD", r"/v2/(?P<name>.+)/blobs/(?P<digest>.+)", self.head_blob)
@@ -165,6 +167,9 @@ class InMemoryOCIRegistry(DummyServer):
# If True, allow single POST upload, not all registries support this
self.allow_single_post = allow_single_post
+ # How many tags are returned in a single request
+ self.tags_per_page = tags_per_page
+
# Used for POST + PUT upload. This is a map from session ID to image name
self.sessions: Dict[str, str] = {}
@@ -280,10 +285,34 @@ class InMemoryOCIRegistry(DummyServer):
return MockHTTPResponse(201, "Created", headers={"Location": f"/v2/{name}/blobs/{digest}"})
def list_tags(self, req: Request, name: str):
+ # Paginate using Link headers, this was added to the spec in the following commit:
+ # https://github.com/opencontainers/distribution-spec/commit/2ed79d930ecec11dd755dc8190409a3b10f01ca9
+
# List all tags, exclude digests.
- tags = [_tag for _name, _tag in self.manifests.keys() if _name == name and ":" not in _tag]
- tags.sort()
- return MockHTTPResponse.with_json(200, "OK", body={"tags": tags})
+ all_tags = sorted(
+ _tag for _name, _tag in self.manifests.keys() if _name == name and ":" not in _tag
+ )
+
+ query = urllib.parse.parse_qs(urllib.parse.urlparse(req.full_url).query)
+
+ n = int(query["n"][0]) if "n" in query else self.tags_per_page
+
+ if "last" in query:
+ try:
+ offset = all_tags.index(query["last"][0]) + 1
+ except ValueError:
+ return MockHTTPResponse(404, "Not found")
+ else:
+ offset = 0
+
+ tags = all_tags[offset : offset + n]
+
+ if offset + n < len(all_tags):
+ headers = {"Link": f'</v2/{name}/tags/list?last={tags[-1]}&n={n}>; rel="next"'}
+ else:
+ headers = None
+
+ return MockHTTPResponse.with_json(200, "OK", headers=headers, body={"tags": tags})
class DummyServerUrllibHandler(urllib.request.BaseHandler):
diff --git a/lib/spack/spack/test/oci/urlopen.py b/lib/spack/spack/test/oci/urlopen.py
index 78d713f7e8..efc3f3c2b0 100644
--- a/lib/spack/spack/test/oci/urlopen.py
+++ b/lib/spack/spack/test/oci/urlopen.py
@@ -6,6 +6,7 @@
import hashlib
import json
+import random
import urllib.error
import urllib.parse
import urllib.request
@@ -19,6 +20,7 @@ from spack.oci.oci import (
copy_missing_layers,
get_manifest_and_config,
image_from_mirror,
+ list_tags,
upload_blob,
upload_manifest,
)
@@ -670,3 +672,31 @@ def test_retry(url, max_retries, expect_failure, expect_requests):
assert len(server.requests) == expect_requests
assert sleep_time == [2**i for i in range(expect_requests - 1)]
+
+
+def test_list_tags():
+ # Follows a relatively new rewording of the OCI distribution spec, which is not yet tagged.
+ # https://github.com/opencontainers/distribution-spec/commit/2ed79d930ecec11dd755dc8190409a3b10f01ca9
+ N = 20
+ urlopen = create_opener(InMemoryOCIRegistry("example.com", tags_per_page=5)).open
+ image = ImageReference.from_string("example.com/image")
+ to_tag = lambda i: f"tag-{i:02}"
+
+ # Create N tags in arbitrary order
+ _tags_to_create = [to_tag(i) for i in range(N)]
+ random.shuffle(_tags_to_create)
+ for tag in _tags_to_create:
+ upload_manifest(image.with_tag(tag), default_manifest(), tag=True, _urlopen=urlopen)
+
+ # list_tags should return all tags from all pages in order
+ tags = list_tags(image, urlopen)
+ assert len(tags) == N
+ assert [to_tag(i) for i in range(N)] == tags
+
+ # Test a single request, which should give the first 5 tags
+ assert json.loads(urlopen(image.tags_url()).read())["tags"] == [to_tag(i) for i in range(5)]
+
+ # Test response at an offset, which should exclude the `last` tag.
+ assert json.loads(urlopen(image.tags_url() + f"?last={to_tag(N - 3)}").read())["tags"] == [
+ to_tag(i) for i in range(N - 2, N)
+ ]
diff --git a/lib/spack/spack/test/util/util_url.py b/lib/spack/spack/test/util/util_url.py
index e2b403f82e..befaaef1cd 100644
--- a/lib/spack/spack/test/util/util_url.py
+++ b/lib/spack/spack/test/util/util_url.py
@@ -207,3 +207,29 @@ def test_default_download_name_dot_dot():
assert url_util.default_download_filename("https://example.com/.") == "_"
assert url_util.default_download_filename("https://example.com/..") == "_."
assert url_util.default_download_filename("https://example.com/.abcdef") == "_abcdef"
+
+
+def test_parse_link_rel_next():
+ parse = url_util.parse_link_rel_next
+ assert parse(r'</abc>; rel="next"') == "/abc"
+ assert parse(r'</abc>; x=y; rel="next", </def>; x=y; rel="prev"') == "/abc"
+ assert parse(r'</abc>; rel="prev"; x=y, </def>; x=y; rel="next"') == "/def"
+
+ # example from RFC5988
+ assert (
+ parse(
+ r"""</TheBook/chapter2>; title*=UTF-8'de'letztes%20Kapitel; rel="previous","""
+ r"""</TheBook/chapter4>; title*=UTF-8'de'n%c3%a4chstes%20Kapitel; rel="next" """
+ )
+ == "/TheBook/chapter4"
+ )
+
+ assert (
+ parse(r"""<https://example.com/example>; key=";a=b, </c/d>; e=f"; rel="next" """)
+ == "https://example.com/example"
+ )
+
+ assert parse("https://example.com/example") is None
+ assert parse("<https://example.com/example; broken=broken") is None
+ assert parse("https://example.com/example; rel=prev") is None
+ assert parse("https://example.com/example; a=b; c=d; g=h") is None
diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py
index d3b5e59fb4..64671f1d95 100644
--- a/lib/spack/spack/util/url.py
+++ b/lib/spack/spack/util/url.py
@@ -10,9 +10,11 @@ Utility functions for parsing, formatting, and manipulating URLs.
import itertools
import os
import posixpath
+import re
import sys
import urllib.parse
import urllib.request
+from typing import Optional
from llnl.path import convert_to_posix_path
@@ -254,3 +256,43 @@ def default_download_filename(url: str) -> str:
valid_name = "_" + valid_name[1:]
return valid_name
+
+
+def parse_link_rel_next(link_value: str) -> Optional[str]:
+ """Return the next link from a Link header value, if any."""
+
+ # Relaxed version of RFC5988
+ uri = re.compile(r"\s*<([^>]+)>\s*")
+ param_key = r"[^;=\s]+"
+ quoted_string = r"\"([^\"]+)\""
+ unquoted_param_value = r"([^;,\s]+)"
+ param = re.compile(rf";\s*({param_key})\s*=\s*(?:{quoted_string}|{unquoted_param_value})\s*")
+
+ data = link_value
+
+ # Parse a list of <url>; key=value; key=value, <url>; key=value; key=value, ... links.
+ while True:
+ uri_match = re.match(uri, data)
+ if not uri_match:
+ break
+ uri_reference = uri_match.group(1)
+ data = data[uri_match.end() :]
+
+ # Parse parameter list
+ while True:
+ param_match = re.match(param, data)
+ if not param_match:
+ break
+ key, quoted_value, unquoted_value = param_match.groups()
+ value = quoted_value or unquoted_value
+ data = data[param_match.end() :]
+
+ if key == "rel" and value == "next":
+ return uri_reference
+
+ if not data.startswith(","):
+ break
+
+ data = data[1:]
+
+ return None