6 files changed, 171 insertions, 10 deletions
diff --git a/lib/spack/spack/cmd/buildcache.py b/lib/spack/spack/cmd/buildcache.py
index 543570cf28..dae2acdeb9 100644
--- a/lib/spack/spack/cmd/buildcache.py
+++ b/lib/spack/spack/cmd/buildcache.py
@@ -13,7 +13,6 @@ import os
 import shutil
 import sys
 import tempfile
-import urllib.request
 from typing import Dict, List, Optional, Tuple, Union
 
 import llnl.util.tty as tty
@@ -54,6 +53,7 @@ from spack.oci.image import (
 from spack.oci.oci import (
     copy_missing_layers_with_retry,
     get_manifest_and_config_with_retry,
+    list_tags,
     upload_blob_with_retry,
     upload_manifest_with_retry,
 )
@@ -856,10 +856,7 @@ def _config_from_tag(image_ref: ImageReference, tag: str) -> Optional[dict]:
 
 
 def _update_index_oci(image_ref: ImageReference, tmpdir: str, pool: MaybePool) -> None:
-    request = urllib.request.Request(url=image_ref.tags_url())
-    response = spack.oci.opener.urlopen(request)
-    spack.oci.opener.ensure_status(request, response, 200)
-    tags = json.load(response)["tags"]
+    tags = list_tags(image_ref)
 
     # Fetch all image config files in parallel
     spec_dicts = pool.starmap(
diff --git a/lib/spack/spack/oci/oci.py b/lib/spack/spack/oci/oci.py
index fefc66674e..09e79df347 100644
--- a/lib/spack/spack/oci/oci.py
+++ b/lib/spack/spack/oci/oci.py
@@ -11,7 +11,7 @@ import urllib.error
 import urllib.parse
 import urllib.request
 from http.client import HTTPResponse
-from typing import NamedTuple, Tuple
+from typing import List, NamedTuple, Tuple
 from urllib.request import Request
 
 import llnl.util.tty as tty
@@ -27,6 +27,7 @@ import spack.spec
 import spack.stage
 import spack.traverse
 import spack.util.crypto
+import spack.util.url
 
 from .image import Digest, ImageReference
 
@@ -69,6 +70,42 @@ def with_query_param(url: str, param: str, value: str) -> str:
     )
 
 
+def list_tags(ref: ImageReference, _urlopen: spack.oci.opener.MaybeOpen = None) -> List[str]:
+    """Retrieves the list of tags associated with an image, handling pagination."""
+    _urlopen = _urlopen or spack.oci.opener.urlopen
+    tags = set()
+    fetch_url = ref.tags_url()
+
+    while True:
+        # Fetch tags
+        request = Request(url=fetch_url)
+        response = _urlopen(request)
+        spack.oci.opener.ensure_status(request, response, 200)
+        tags.update(json.load(response)["tags"])
+
+        # Check for pagination
+        link_header = response.headers["Link"]
+
+        if link_header is None:
+            break
+
+        tty.debug(f"OCI tag pagination: {link_header}")
+
+        rel_next_value = spack.util.url.parse_link_rel_next(link_header)
+
+        if rel_next_value is None:
+            break
+
+        rel_next = urllib.parse.urlparse(rel_next_value)
+
+        if rel_next.scheme not in ("https", ""):
+            break
+
+        fetch_url = ref.endpoint(rel_next_value)
+
+    return sorted(tags)
+
+
 def upload_blob(
     ref: ImageReference,
     file: str,
diff --git a/lib/spack/spack/test/oci/mock_registry.py b/lib/spack/spack/test/oci/mock_registry.py
index cc39904f3c..288598089d 100644
--- a/lib/spack/spack/test/oci/mock_registry.py
+++ b/lib/spack/spack/test/oci/mock_registry.py
@@ -151,7 +151,9 @@ class InMemoryOCIRegistry(DummyServer):
     A third option is to use the chunked upload, but this is not implemented here, because
     it's typically a major performance hit in upload speed, so we're not using it in Spack."""
 
-    def __init__(self, domain: str, allow_single_post: bool = True) -> None:
+    def __init__(
+        self, domain: str, allow_single_post: bool = True, tags_per_page: int = 100
+    ) -> None:
         super().__init__(domain)
         self.router.register("GET", r"/v2/", self.index)
         self.router.register("HEAD", r"/v2/(?P<name>.+)/blobs/(?P<digest>.+)", self.head_blob)
@@ -165,6 +167,9 @@ class InMemoryOCIRegistry(DummyServer):
         # If True, allow single POST upload, not all registries support this
         self.allow_single_post = allow_single_post
 
+        # How many tags are returned in a single request
+        self.tags_per_page = tags_per_page
+
         # Used for POST + PUT upload. This is a map from session ID to image name
         self.sessions: Dict[str, str] = {}
 
@@ -280,10 +285,34 @@ class InMemoryOCIRegistry(DummyServer):
         return MockHTTPResponse(201, "Created", headers={"Location": f"/v2/{name}/blobs/{digest}"})
 
     def list_tags(self, req: Request, name: str):
+        # Paginate using Link headers, this was added to the spec in the following commit:
+        # https://github.com/opencontainers/distribution-spec/commit/2ed79d930ecec11dd755dc8190409a3b10f01ca9
+
         # List all tags, exclude digests.
-        tags = [_tag for _name, _tag in self.manifests.keys() if _name == name and ":" not in _tag]
-        tags.sort()
-        return MockHTTPResponse.with_json(200, "OK", body={"tags": tags})
+        all_tags = sorted(
+            _tag for _name, _tag in self.manifests.keys() if _name == name and ":" not in _tag
+        )
+
+        query = urllib.parse.parse_qs(urllib.parse.urlparse(req.full_url).query)
+
+        n = int(query["n"][0]) if "n" in query else self.tags_per_page
+
+        if "last" in query:
+            try:
+                offset = all_tags.index(query["last"][0]) + 1
+            except ValueError:
+                return MockHTTPResponse(404, "Not found")
+        else:
+            offset = 0
+
+        tags = all_tags[offset : offset + n]
+
+        if offset + n < len(all_tags):
+            headers = {"Link": f'</v2/{name}/tags/list?last={tags[-1]}&n={n}>; rel="next"'}
+        else:
+            headers = None
+
+        return MockHTTPResponse.with_json(200, "OK", headers=headers, body={"tags": tags})
 
 
 class DummyServerUrllibHandler(urllib.request.BaseHandler):
diff --git a/lib/spack/spack/test/oci/urlopen.py b/lib/spack/spack/test/oci/urlopen.py
index 78d713f7e8..efc3f3c2b0 100644
--- a/lib/spack/spack/test/oci/urlopen.py
+++ b/lib/spack/spack/test/oci/urlopen.py
@@ -6,6 +6,7 @@
 
 import hashlib
 import json
+import random
 import urllib.error
 import urllib.parse
 import urllib.request
@@ -19,6 +20,7 @@ from spack.oci.oci import (
     copy_missing_layers,
     get_manifest_and_config,
     image_from_mirror,
+    list_tags,
     upload_blob,
     upload_manifest,
 )
@@ -670,3 +672,31 @@ def test_retry(url, max_retries, expect_failure, expect_requests):
 
     assert len(server.requests) == expect_requests
     assert sleep_time == [2**i for i in range(expect_requests - 1)]
+
+
+def test_list_tags():
+    # Follows a relatively new rewording of the OCI distribution spec, which is not yet tagged.
+    # https://github.com/opencontainers/distribution-spec/commit/2ed79d930ecec11dd755dc8190409a3b10f01ca9
+    N = 20
+    urlopen = create_opener(InMemoryOCIRegistry("example.com", tags_per_page=5)).open
+    image = ImageReference.from_string("example.com/image")
+    to_tag = lambda i: f"tag-{i:02}"
+
+    # Create N tags in arbitrary order
+    _tags_to_create = [to_tag(i) for i in range(N)]
+    random.shuffle(_tags_to_create)
+    for tag in _tags_to_create:
+        upload_manifest(image.with_tag(tag), default_manifest(), tag=True, _urlopen=urlopen)
+
+    # list_tags should return all tags from all pages in order
+    tags = list_tags(image, urlopen)
+    assert len(tags) == N
+    assert [to_tag(i) for i in range(N)] == tags
+
+    # Test a single request, which should give the first 5 tags
+    assert json.loads(urlopen(image.tags_url()).read())["tags"] == [to_tag(i) for i in range(5)]
+
+    # Test response at an offset, which should exclude the `last` tag.
+    assert json.loads(urlopen(image.tags_url() + f"?last={to_tag(N - 3)}").read())["tags"] == [
+        to_tag(i) for i in range(N - 2, N)
+    ]
diff --git a/lib/spack/spack/test/util/util_url.py b/lib/spack/spack/test/util/util_url.py
index e2b403f82e..befaaef1cd 100644
--- a/lib/spack/spack/test/util/util_url.py
+++ b/lib/spack/spack/test/util/util_url.py
@@ -207,3 +207,29 @@ def test_default_download_name_dot_dot():
     assert url_util.default_download_filename("https://example.com/.") == "_"
     assert url_util.default_download_filename("https://example.com/..") == "_."
     assert url_util.default_download_filename("https://example.com/.abcdef") == "_abcdef"
+
+
+def test_parse_link_rel_next():
+    parse = url_util.parse_link_rel_next
+    assert parse(r'</abc>; rel="next"') == "/abc"
+    assert parse(r'</abc>; x=y; rel="next", </def>; x=y; rel="prev"') == "/abc"
+    assert parse(r'</abc>; rel="prev"; x=y, </def>; x=y; rel="next"') == "/def"
+
+    # example from RFC5988
+    assert (
+        parse(
+            r"""</TheBook/chapter2>; title*=UTF-8'de'letztes%20Kapitel; rel="previous","""
+            r"""</TheBook/chapter4>; title*=UTF-8'de'n%c3%a4chstes%20Kapitel; rel="next" """
+        )
+        == "/TheBook/chapter4"
+    )
+
+    assert (
+        parse(r"""<https://example.com/example>; key=";a=b, </c/d>; e=f"; rel="next" """)
+        == "https://example.com/example"
+    )
+
+    assert parse("https://example.com/example") is None
+    assert parse("<https://example.com/example; broken=broken") is None
+    assert parse("https://example.com/example; rel=prev") is None
+    assert parse("https://example.com/example; a=b; c=d; g=h") is None
diff --git a/lib/spack/spack/util/url.py b/lib/spack/spack/util/url.py
index d3b5e59fb4..64671f1d95 100644
--- a/lib/spack/spack/util/url.py
+++ b/lib/spack/spack/util/url.py
@@ -10,9 +10,11 @@ Utility functions for parsing, formatting, and manipulating URLs.
 import itertools
 import os
 import posixpath
+import re
 import sys
 import urllib.parse
 import urllib.request
+from typing import Optional
 
 from llnl.path import convert_to_posix_path
 
@@ -254,3 +256,43 @@ def default_download_filename(url: str) -> str:
         valid_name = "_" + valid_name[1:]
 
     return valid_name
+
+
+def parse_link_rel_next(link_value: str) -> Optional[str]:
+    """Return the next link from a Link header value, if any."""
+
+    # Relaxed version of RFC5988
+    uri = re.compile(r"\s*<([^>]+)>\s*")
+    param_key = r"[^;=\s]+"
+    quoted_string = r"\"([^\"]+)\""
+    unquoted_param_value = r"([^;,\s]+)"
+    param = re.compile(rf";\s*({param_key})\s*=\s*(?:{quoted_string}|{unquoted_param_value})\s*")
+
+    data = link_value
+
+    # Parse a list of <url>; key=value; key=value, <url>; key=value; key=value, ... links.
+    while True:
+        uri_match = re.match(uri, data)
+        if not uri_match:
+            break
+        uri_reference = uri_match.group(1)
+        data = data[uri_match.end() :]
+
+        # Parse parameter list
+        while True:
+            param_match = re.match(param, data)
+            if not param_match:
+                break
+            key, quoted_value, unquoted_value = param_match.groups()
+            value = quoted_value or unquoted_value
+            data = data[param_match.end() :]
+
+            if key == "rel" and value == "next":
+                return uri_reference
+
+        if not data.startswith(","):
+            break
+
+        data = data[1:]
+
+    return None