"""Github files resources URIs fetcher."""
import json
import re
import typing as t
import urllib.parse
from project_config.utils.http import GET
[docs]def _get_default_branch_from_repo_branches_html(
repo_owner: str,
repo_name: str,
) -> t.Optional[str]:
# try from repository HTML
result = GET(f"https://github.com/{repo_owner}/{repo_name}/branches")
match = re.search(r'branch=["|\'](\w+)["|\']', result)
return match.group(1) if match else None
[docs]def _get_default_branch_from_repo_github_api(
repo_owner: str,
repo_name: str,
) -> str: # pragma: no cover
# try from API
#
# note that this function is not covered by the tests because
# the previous function that retrieves the default branch from the
# HTML of the repo must be the one that works, this only acts as
# a fallback, though could reach the limit of usage of the API
#
# if the previous becomes problematic we should improve the management
# of the API rate limit with a Github token
result = GET(f"https://api.github.com/repos/{repo_owner}/{repo_name}")
return json.loads(result)["default_branch"] # type: ignore
[docs]def _get_default_branch_from_git_repo(
repo_owner: str,
repo_name: str,
) -> str: # pragma: no cover
return _get_default_branch_from_repo_branches_html(
repo_owner,
repo_name,
) or _get_default_branch_from_repo_github_api(
repo_owner,
repo_name,
)
[docs]def _build_raw_githubusercontent_url(
repo_owner: str,
repo_name: str,
git_reference: str,
fpath: str,
) -> str:
return (
f"https://raw.githubusercontent.com/{repo_owner}/{repo_name}/"
f"{git_reference}/{fpath}"
)
[docs]def fetch(url_parts: urllib.parse.SplitResult, **kwargs: t.Any) -> str:
"""Fetch a resource through HTTPs protocol for a Github URI."""
# extract project, filepath and git reference
project_maybe_with_gitref, fpath = url_parts.path.lstrip("/").split(
"/",
maxsplit=1,
)
if "@" in project_maybe_with_gitref:
project, git_reference = project_maybe_with_gitref.split("@")
else:
project = project_maybe_with_gitref
git_reference = _get_default_branch_from_git_repo(
url_parts.netloc, # netloc is the repo owner here
project,
)
url = _build_raw_githubusercontent_url(
url_parts.netloc,
project,
git_reference,
fpath,
)
return GET(url, **kwargs)