Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support gar artifacts urls #907

Merged
merged 15 commits into from
Feb 13, 2025
Merged
102 changes: 80 additions & 22 deletions platform_registry_api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,37 @@ def default(cls) -> "CatalogPage":

@dataclass(frozen=True)
class RepoURL:
_path_re: ClassVar[Pattern[str]] = re.compile(
r"/v2/(?P<repo>.+)/(?P<path_suffix>(tags|manifests|blobs)/.*)"
)

repo: str
url: URL
mounted_repo: str = ""

_v2_path_re: ClassVar[Pattern[str]] = re.compile(
r"/v2/(?P<repo>.+)/(?P<path_suffix>(tags|manifests|blobs)/.*)"
)
_allowed_skip_perms_path_re: tuple[Pattern[str], Pattern[str]] = (
# URLs used exclusively by Google Artifact Registry (GAR).
# These URLs are sent without Authorization headers,
# so we cannot check permissions.
# We need just proxy such requests to the upstream without modifying the URL.
# === Examples: ===
# /artifacts-uploads/namespaces/development-421920/
# repositories/platform-registry-dev/uploads/AF2XiV ...
# /v2/development-421920/platform-registry-dev/pkg/blobs/uploads/AJMTJPA ...
re.compile(
r"^/(artifacts-uploads|artifacts-downloads)/namespaces/(?P<project>.+)/"
r"repositories/(?P<repo>.+)/(?P<path_suffix>(uploads|downloads))/"
r"(?P<upload_id>[A-Za-z0-9_=-]+)"
),
re.compile(r"^/v2/(?P<project>.+)/(?P<repo>.+)/pkg/(?P<path_suffix>blobs/.+)"),
)

@staticmethod
def _get_match_skip_perms_path_re(url: URL) -> Optional[re.Match[str]]:
for path_re in RepoURL._allowed_skip_perms_path_re:
if match := path_re.fullmatch(url.path):
return match
return None

@classmethod
def from_url(cls, url: URL) -> "RepoURL":
# validating the url
Expand All @@ -116,7 +139,13 @@ def from_url(cls, url: URL) -> "RepoURL":

@classmethod
def _parse(cls, url: URL) -> tuple[str, str, URL]:
match = cls._path_re.fullmatch(url.path)
if match := cls._get_match_skip_perms_path_re(url):
return (
f"{match.group('project')}/{match.group('repo')}",
"",
URL(match.group("path_suffix")),
)
match = cls._v2_path_re.fullmatch(url.path)
if not match:
raise ValueError(f"unexpected path in a registry URL: {url}")
path_suffix = URL.build(path=match.group("path_suffix"), query=url.query)
Expand All @@ -127,6 +156,9 @@ def _parse(cls, url: URL) -> tuple[str, str, URL]:
mounted_repo = path_suffix.query["from"]
return match.group("repo"), mounted_repo, path_suffix

def allow_skip_perms(self) -> bool:
return True if self._get_match_skip_perms_path_re(self.url) else False

def with_project(
self, project: str, upstream_repo: Optional[str] = None
) -> "RepoURL":
Expand Down Expand Up @@ -211,9 +243,12 @@ def create_registry_catalog_url(self, query: dict[str, str]) -> URL:
return self._registry_endpoint_url.with_path("/v2/_catalog").with_query(query)

def create_upstream_repo_url(self, registry_url: RepoURL) -> RepoURL:
return registry_url.with_project(
self._upstream_project, self._upstream_repo
).with_origin(self._upstream_endpoint_url)
if registry_url.allow_skip_perms():
return registry_url.with_origin(self._upstream_endpoint_url)
else:
return registry_url.with_project(
self._upstream_project, self._upstream_repo
).with_origin(self._upstream_endpoint_url)

def create_registry_repo_url(self, upstream_url: RepoURL) -> RepoURL:
upstream_repo = upstream_url.repo
Expand Down Expand Up @@ -269,6 +304,24 @@ def register(self, app: aiohttp.web.Application) -> None:
)
)

def register_artifacts(self, app: aiohttp.web.Application) -> None:
app.add_routes(
aiohttp.web.route(
method,
r"/artifacts-{action:(uploads|downloads)}/namespaces/{project:.+}/"
r"repositories/{repo:.+}/{path_suffix:(uploads|downloads)/?.*}",
self.handle,
)
for method in (
METH_HEAD,
METH_GET,
METH_POST,
METH_DELETE,
METH_PATCH,
METH_PUT,
)
)

def _create_url_factory(self, request: Request) -> URLFactory:
return URLFactory.from_config(
registry_endpoint_url=request.url.origin(), config=self._config
Expand Down Expand Up @@ -626,20 +679,21 @@ async def handle(self, request: Request) -> StreamResponse:

registry_repo_url = RepoURL.from_url(request.url)

permissions = [
Permission(
uri=self._create_image_uri(registry_repo_url.repo),
action="read" if self._is_pull_request(request) else "write",
)
]
if registry_repo_url.mounted_repo:
permissions.append(
if not registry_repo_url.allow_skip_perms():
permissions = [
Permission(
uri=self._create_image_uri(registry_repo_url.mounted_repo),
action="read",
uri=self._create_image_uri(registry_repo_url.repo),
action="read" if self._is_pull_request(request) else "write",
)
)
await self._check_user_permissions(request, permissions)
]
if registry_repo_url.mounted_repo:
permissions.append(
Permission(
uri=self._create_image_uri(registry_repo_url.mounted_repo),
action="read",
)
)
await self._check_user_permissions(request, permissions)

url_factory = self._create_url_factory(request)
upstream_repo_url = url_factory.create_upstream_repo_url(registry_repo_url)
Expand Down Expand Up @@ -703,7 +757,6 @@ async def _proxy_request(
auth_headers: dict[str, str],
) -> StreamResponse:
request_headers = self._prepare_request_headers(request.headers, auth_headers)

timeout = self._create_registry_client_timeout(request)

if request.method == "HEAD":
Expand Down Expand Up @@ -874,7 +927,12 @@ def _convert_location_header(self, url_str: str, url_factory: URLFactory) -> str
and url_raw.host != url_factory.registry_host
):
return url_str # Redirect to outer service, maybe AWS S3 redirect

upstream_repo_url = RepoURL.from_url(URL(url_str))

if upstream_repo_url.allow_skip_perms():
return url_str

registry_repo_url = url_factory.create_registry_repo_url(upstream_repo_url)
logger.info(
"converted upstream repo URL to registry repo URL: %s -> %s",
Expand Down Expand Up @@ -988,7 +1046,7 @@ async def on_request_redirect(
v2_app = aiohttp.web.Application()
v2_handler = V2Handler(app=v2_app, config=config)
v2_handler.register(v2_app)

v2_handler.register_artifacts(app)
app["v2_app"] = v2_app
app.add_subapp("/v2", v2_app)

Expand Down
101 changes: 88 additions & 13 deletions tests/unit/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,68 @@

class TestRepoURL:
@pytest.mark.parametrize(
"url", (URL("/"), URL("/v2/"), URL("/v2/tags/list"), URL("/v2/blobs/uploads/"))
"url",
(
URL("/"),
URL("/v2/"),
URL("/v2/tags/list"),
URL("/v2/blobs/uploads/"),
URL("/artifacts-uploads/repo/uploads/"),
URL("/project/pkg/blobs/"),
URL("/project/repo/pkg/uploads/"),
),
)
def test_from_url_value_error(self, url: URL) -> None:
with pytest.raises(
ValueError, match=f"unexpected path in a registry URL: {url}"
):
RepoURL.from_url(url)

def test_from_url(self) -> None:
@pytest.mark.parametrize(
"url, expected",
[
(URL("/v2/project/repo/pkg/blobs/uploads/smth"), True),
(
URL(
"/artifacts-uploads/namespaces/project-name/"
"repositories/repo-name/uploads/smth"
),
True,
),
(URL("/v2/project/pkg/blobs/uploads/smth"), False),
(URL("/v2/this/img/blobs/uploads/?what=ever&from=another/img"), False),
],
)
def test_allow_skip_perms(self, url: URL, expected: bool) -> None:
reg_url = RepoURL.from_url(url)
assert reg_url.allow_skip_perms() == expected

def test_from_url_v2(self) -> None:
url = URL("https://example.com/v2/name/tags/list?whatever=thatis")
reg_url = RepoURL.from_url(url)
assert reg_url == RepoURL(repo="name", url=url)

@pytest.mark.parametrize(
"url, expected_repo",
[
(URL("https://example.com/v2/name/tags/list?whatever=thatis"), "name"),
(
URL(
"/artifacts-uploads/namespaces/project-name/"
"repositories/repo-name/uploads/docker-upload-blob"
),
"project-name/repo-name",
),
(
URL("/v2/project-name/repo-name/pkg/blobs/uploads/smth"),
"project-name/repo-name",
),
],
)
def test_from_url(self, url: URL, expected_repo: str) -> None:
reg_url = RepoURL.from_url(url)
assert reg_url == RepoURL(repo=expected_repo, url=url)

def test_from_url_edge_case_1(self) -> None:
url = URL("/v2/tags/tags/list?whatever=thatis")
reg_url = RepoURL.from_url(url)
Expand Down Expand Up @@ -141,18 +190,44 @@ def test_create_registry_version_check_url(self, url_factory: URLFactory) -> Non
"http://upstream:5000/v2/"
)

def test_create_upstream_repo_url(self, url_factory: URLFactory) -> None:
reg_repo_url = RepoURL.from_url(
URL("http://registry:5000/v2/this/image/tags/list?what=ever")
)
@pytest.mark.parametrize(
"reg_url, expected_repo, expected_url",
[
(
URL("http://registry:5000/v2/this/image/tags/list?what=ever"),
"upstream/nested/this/image",
URL(
"http://upstream:5000/v2/upstream/nested/this/image/tags/"
"list?what=ever"
),
),
(
URL(
"/artifacts-uploads/namespaces/proj/repositories/repo/uploads/blob"
),
"proj/repo",
URL(
"http://upstream:5000/artifacts-uploads/namespaces/proj/"
"repositories/repo/uploads/blob"
),
),
(
URL("/v2/proj/repo/pkg/blobs/uploads/smth"),
"proj/repo",
URL("http://upstream:5000/v2/proj/repo/pkg/blobs/uploads/smth"),
),
],
)
def test_create_upstream_repo_url(
self,
url_factory: URLFactory,
reg_url: URL,
expected_repo: str,
expected_url: URL,
) -> None:
reg_repo_url = RepoURL.from_url(reg_url)
up_repo_url = url_factory.create_upstream_repo_url(reg_repo_url)

expected_url = URL(
"http://upstream:5000/v2/upstream/nested/this/image/tags/list" "?what=ever"
)
assert up_repo_url == RepoURL(
repo="upstream/nested/this/image", url=expected_url
)
assert up_repo_url == RepoURL(repo=expected_repo, url=expected_url)

def test_create_registry_repo_url(self, url_factory: URLFactory) -> None:
up_repo_url = RepoURL.from_url(
Expand Down