Source code for project_config.fetchers.github
"""Github files resources URIs fetcher."""
from __future__ import annotations
import base64
import json
import os
import re
import urllib.parse
from enum import Enum
from typing import Any
from project_config import __version__
from project_config.utils.http import GET
SEMVER_REGEX = r"\d+\.\d+\.\d+"
[docs]class AcceptHeader(Enum):
"""Accept header values for Github API."""
JSON = "application/vnd.github+json"
[docs]def _github_headers(accept: AcceptHeader | None = None) -> dict[str, str]:
headers = {
"X-GitHub-Api-Version": "2022-11-28",
"User-Agent": f"project-config v{__version__}",
}
if accept == AcceptHeader.JSON:
headers["Accept"] = "application/vnd.github+json"
github_token = os.environ.get("GITHUB_TOKEN")
if github_token:
headers["Authorization"] = f"Bearer {github_token}"
return headers
[docs]def _build_github_api_url(
repo_owner: str,
repo_name: str,
git_reference: str | None,
fpath: str,
) -> str:
query_parameters = "" if not git_reference else f"?ref={git_reference}"
return (
f"https://api.github.com/repos/{repo_owner}/{repo_name}"
f"/contents/{fpath}{query_parameters}"
)
[docs]def resolve_url(url_parts: urllib.parse.SplitResult) -> str:
"""Resolve a ``gh:`` scheme URI to their real counterpart.
Args:
url_parts (urllib.parse.SplitResult): The URL parts of the URI.
Returns:
str: The real ``https:`` scheme URL.
"""
# extract project, filepath and git reference
project_maybe_with_gitref, fpath = url_parts.path.lstrip("/").split(
"/",
maxsplit=1,
)
if "@" in project_maybe_with_gitref:
project, git_reference = project_maybe_with_gitref.split("@")
else:
project, git_reference = (project_maybe_with_gitref, None)
return _build_github_api_url(
url_parts.netloc,
project,
git_reference,
fpath,
)
[docs]def fetch(url_parts: urllib.parse.SplitResult, **kwargs: Any) -> Any:
"""Fetch a resource through HTTPs protocol for a Github URI.
Args:
url_parts (urllib.parse.SplitResult): The URL parts of the URI.
**kwargs (Any): The keyword arguments to pass to the ``GET`` function.
Returns:
str: The fetched resource content.
"""
if "headers" not in kwargs:
kwargs["headers"] = {}
kwargs["headers"].update(_github_headers(accept=AcceptHeader.JSON))
response = json.loads(GET(resolve_url(url_parts), **kwargs))
if "content" in response:
return base64.b64decode(response["content"]).decode("utf-8")
return response
[docs]def get_latest_release_tags(
repo_owner: str,
repo_name: str,
only_semver: bool = False, # noqa: FBT001, FBT002
) -> list[str]:
"""Get the latest release tag of a Github repository.
Args:
repo_owner (str): The Github repository owner.
repo_name (str): The Github repository name.
only_semver (bool): If True, only return a tag if it is a semver tag.
Returns:
str: The latest release tag.
"""
result = GET(
f"https://github.com/{repo_owner}/{repo_name}/tags",
headers=_github_headers(),
)
regex = (
rf'/{re.escape(repo_owner)}/{re.escape(repo_name)}/releases/tag/([^"]+)'
)
response = []
tags = re.findall(regex, result)
for tag in tags:
if tag in response:
continue
cleaned_tag = re.sub("^[a-zA-Z-]+", "", tag)
if not cleaned_tag:
continue
if only_semver and not re.match(SEMVER_REGEX, cleaned_tag):
continue
response.append(tag)
return response