Source code for project_config.tree

"""File system tree API."""

from __future__ import annotations

import contextlib
import functools
import os
import stat
from collections.abc import Iterable
from typing import Any
from urllib.parse import SplitResult

from project_config.cache import Cache
from project_config.fetchers import (
    download_file_from_urlsplit_scheme,
    urlsplit_with_scheme,
)
from project_config.serializers import (
    SerializerError,
    deserialize_for_url,
    guess_preferred_serializer,
    serialize_for_url,
)
from project_config.utils.crypto import hash_file


__all__ = (
    "cache_file",
    "cached_local_file",
    "fetch_remote_file",
    "edit_local_file",
)


IGNORE_SERIALIZATION_ERRORS_CTX = functools.partial(
    contextlib.suppress,
    SerializerError,
)


def _split_fname_preferred_serializer(
    fpath: str,
) -> tuple[str, str | None]:
    preferred_serializer: str | None = None
    fname = fpath
    if "?" in fpath:
        fname, preferred_serializer = fpath.split("?", maxsplit=1)
        # check if the serializer is really an URL with arguments after '?'
        if "&" in preferred_serializer or "=" in preferred_serializer:
            preferred_serializer = None
            fname = fpath
    return (fname, preferred_serializer)


def _split_fpath_parts(
    fpath: str,
) -> tuple[str, str | None, SplitResult, str]:
    fname, preferred_serializer = _split_fname_preferred_serializer(
        fpath,
    )
    uri_parts, scheme = urlsplit_with_scheme(fname)
    return (fname, preferred_serializer, uri_parts, scheme)


[docs]def cache_file( # noqa: PLR0912, PLR0915 fpath: str, serializers: list[str] | None = None, forbid_serializers: Iterable[str] | None = None, ignore_serialization_errors: bool = False, # noqa: FBT001, FBT002 ) -> None: """Cache the file content and its serialized version. If the file is local, the cache key is the file path, its last modification time and root directory name. If the file is remote, the cache key is the file URL. Args: fpath (str): The file path or URL. serializers (Iterable[str], optional): The serializers to use. forbid_serializers (Iterable[str], optional): The serializers to forbid. Only makes sense for serializers guessed for the file. ignore_serialization_errors (bool, optional): If True, ignore serialization errors. """ ( fname, preferred_serializer, uri_parts, scheme, ) = _split_fpath_parts(fpath) is_local_file = scheme == "file" if serializers is None: serializers = [] if preferred_serializer is None: preferred_serializer = guess_preferred_serializer(fname)[1] if preferred_serializer is not None: serializers.append(preferred_serializer) # some serializers are forbidden in some calls, like the Python # one when we are precaching a Python file before executing # plugins if forbid_serializers: for serializer in forbid_serializers: if serializer in serializers: serializers.remove(serializer) serialization_context = ( IGNORE_SERIALIZATION_ERRORS_CTX if ignore_serialization_errors else contextlib.nullcontext ) previous_value_in_cache: dict[str, str] | None = None if is_local_file: # the file is local, check if exists in the cache unmodified try: fstat = os.stat(fname) except FileNotFoundError: # the file does not exist, skip caching return if stat.S_ISDIR(fstat.st_mode): # the file is a directory, skip caching return # use hashes for files with multiple serializers fhash = hash_file(fname) previous_value_in_cache = Cache.get(fhash) if previous_value_in_cache is None: # if not, cache the file content with open(fname, encoding="utf-8") as f: plain_fcontent = f.read() new_cache_value = {"_plain": plain_fcontent} with serialization_context(): # type: ignore for serializer in serializers or []: new_cache_value[serializer] = serialize_for_url( fname, plain_fcontent, prefer_serializer=serializer, ) Cache.set(fhash, new_cache_value) else: # file is already cached, just update serialized versions _changed = False with serialization_context(): # type: ignore for serializer in serializers or []: if serializer not in previous_value_in_cache: previous_value_in_cache[serializer] = serialize_for_url( fname, previous_value_in_cache["_plain"], prefer_serializer=serializer, ) _changed = True if _changed: Cache.set(fhash, previous_value_in_cache) else: # the file is remote, check if resides in the cache previous_value_in_cache = Cache.get(fname) if previous_value_in_cache is None: # TODO: What happens trying to download a directory? plain_fcontent = download_file_from_urlsplit_scheme( fname, uri_parts, scheme, ) new_cache_value = {"_plain": plain_fcontent} with serialization_context(): # type: ignore for serializer in serializers or []: new_cache_value[serializer] = serialize_for_url( fname, plain_fcontent, prefer_serializer=serializer, ) Cache.set(fname, new_cache_value) else: # file is already cached, just update serialized versions _changed = False with serialization_context(): # type: ignore for serializer in serializers or []: if serializer not in previous_value_in_cache: previous_value_in_cache[serializer] = serialize_for_url( fname, previous_value_in_cache["_plain"], prefer_serializer=serializer, ) _changed = True if _changed: Cache.set(fname, previous_value_in_cache)
[docs]def cached_local_file( fpath: str, serializer: str | None = None, ) -> Any: """Get the cached file content. Args: fpath (str): The file path. serializer (str, optional): The serializer to use reading the file. Returns: str: The cached file content. """ fname, preferred_serializer = _split_fname_preferred_serializer(fpath) if serializer is None: if preferred_serializer is None: preferred_serializer = guess_preferred_serializer(fname)[1] serializer = preferred_serializer fhash = hash_file(fname) previous_value_in_cache: dict[str, str] | None = Cache.get(fhash) if previous_value_in_cache is None: # A file could be requested but is not inside `files` # object, so it is not cached yet cache_file( fpath, serializers=( [serializer] if serializer not in ("_plain", None) else [] ), ) return cached_local_file(fpath, serializer=serializer) if serializer not in previous_value_in_cache: previous_value_in_cache[serializer] = serialize_for_url( fname, previous_value_in_cache["_plain"], prefer_serializer=serializer, ) # Don't serialize Python files because some types like # modules can't be serialized by pickle # # TODO: Manage this in a better way if serializer == "py": result = previous_value_in_cache.pop("py") else: result = previous_value_in_cache[serializer] Cache.set(fhash, previous_value_in_cache) else: result = previous_value_in_cache[serializer] return result
[docs]def fetch_remote_file( uri: str, serializer: str | None = None, ) -> Any: """Fetch the remote file content. Args: uri (str): The file uri. serializer (str, optional): The serializer to use. """ ( fname, preferred_serializer, uri_parts, scheme, ) = _split_fpath_parts(uri) if scheme == "file": cache_file(uri) return cached_local_file(uri) if serializer is None: if preferred_serializer is None: _, serializer = guess_preferred_serializer(fname) serializer = preferred_serializer previous_value_in_cache: dict[str, str] | None = Cache.get(fname) if previous_value_in_cache is None: plain_fcontent = download_file_from_urlsplit_scheme( fname, uri_parts, scheme, ) new_cache_value = {"_plain": plain_fcontent} else: new_cache_value = previous_value_in_cache if serializer not in new_cache_value: new_cache_value[serializer] = serialize_for_url( # type: ignore fname, plain_fcontent, prefer_serializer=serializer, ) Cache.set(fname, new_cache_value) return new_cache_value[serializer] # type: ignore
[docs]def edit_local_file(fpath: str, new_content: Any) -> bool: """Edit the local file and update the cache. Args: fpath (str): The file path. new_content (Any): The new object to serialize. """ fpath, preferred_serializer = guess_preferred_serializer(fpath) previous_content_string = cached_local_file(fpath) new_content_string = deserialize_for_url( fpath, new_content, prefer_serializer=preferred_serializer, ) if previous_content_string != new_content_string: with open(fpath, "w", encoding="utf-8") as f: f.write(new_content_string) cache_file( fpath, serializers=( [preferred_serializer] if preferred_serializer is not None else [] ), ) return True return False