Source code for project_config.tree

"""Cached files tree used by the linter when using checker commands."""

from __future__ import annotations

import glob
import os
import typing as t
from dataclasses import dataclass

from project_config.fetchers import fetch
from project_config.serializers import (
    deserialize_for_url,
    guess_preferred_serializer,
    serialize_for_url,
)


TreeDirectory = t.Iterator[str]
TreeNode = t.Union[str, TreeDirectory]
TreeNodeFiles = t.List[t.Tuple[str, TreeNode]]
TreeNodeFilesIterator = t.Iterator[t.Tuple[str, TreeNode]]
FilePathsArgument = t.Union[t.Iterator[str], t.List[str]]


[docs]@dataclass
class Tree:
    """Files cache used by the linter in checking processes.

    It represents the tree of files and directories starting
    at the root directory of the project.

    Instances of :py:class:`project_config.tree.Tree` can be
    iterated with:

    .. code-block:: python

       for fpath, fcontent in tree.files:
           if fcontent is None:
                # file does not exist
                ...
           elif not isinstance(fcontent, str):
                # file is a directory
                #
                # so `fcontent` is another Tree instance here
                for nested_fpath, nested_fcontent in fcontent.files:
                    ...

    If you want to get the serialialized version of the file you can
    use the method :py:meth:`project_config.tree.Tree.serialize_file`:

    .. code-block:: python

       instance = fpath, tree.serialize_file(fpath)

    If you are not inside a context were you have the content
    of the files (a common scenario for conditional actions)
    you can get them calling the method
    :py:meth:`project_config.tree.Tree.get_file_content`:

    .. code-block:: python

       fcontent = tree.get_file_content(fpath)

    This class caches the files contents along with their
    serialized versions, so subsequent access to the same
    files in the project tree are fast.

    Args:
        rootdir (str): Root directory of the project.
    """

    rootdir: str

    def __post_init__(self) -> None:
        # cache for all files
        #
        # TODO: this type becomes recursive, in the future, define it properly
        # https://github.com/python/mypy/issues/731
        self.files_cache: t.Dict[str, t.Tuple[bool, t.Optional[str]]] = {}

        # cache for serialized version of files
        #
        # JSON encodable version of files are cached here to avoid
        # multiple calls to serializer for the same file
        self.serialized_files_cache: t.Dict[str, str] = {}

        # latest cached files
        self._files: TreeNodeFiles = []

[docs]    def normalize_path(self, fpath: str) -> str:
        """Normalize a path given his relative path to the root directory.

        Args:
            fpath (str): Path to the file relative to the root directory.

        Returns:
            str: Normalized absolute path.
        """
        return os.path.join(self.rootdir, fpath)

[docs]    def _cache_file(self, fpath: str) -> str:
        """Cache a file normalizing its path.

        Args:
            fpath (str): Relative path from root directory.

        Returns:
            str: Normalized absolute path.
        """
        normalized_fpath = self.normalize_path(fpath)

        if os.path.isfile(normalized_fpath):
            with open(normalized_fpath, encoding="utf-8") as f:
                self.files_cache[normalized_fpath] = (False, f.read())
        elif os.path.isdir(normalized_fpath):
            # recursive generation
            self.files_cache[normalized_fpath] = (  # type: ignore
                True,
                self._generator(
                    self.normalize_path(fname)
                    for fname in os.listdir(normalized_fpath)
                ),
            )
        else:
            # file or directory does not exist
            self.files_cache[normalized_fpath] = (False, None)

        return normalized_fpath

[docs]    def _generator(
        self,
        fpaths: FilePathsArgument,
    ) -> t.Iterable[t.Tuple[str, t.Optional[str]]]:
        for fpath_or_glob in fpaths:
            # try to get all existing files from glob
            #
            # note that when a glob does not match any files,
            # is because the file does not exist, so the generator
            # will yield it as is, which would lead to a unexistent
            # file error when an user specifies a glob that do not
            # match any files
            fpaths_from_glob = glob.glob(fpath_or_glob)
            if fpaths_from_glob:
                for fpath in fpaths_from_glob:
                    yield self.normalize_path(fpath), self.files_cache[
                        self._cache_file(fpath)
                    ][1]
            else:
                yield self.normalize_path(fpath_or_glob), self.files_cache[
                    self._cache_file(fpath_or_glob)
                ][1]

[docs]    def get_file_content(self, fpath: str) -> str:
        """Returns the content of a file given his relative path.

        This method is tipically used by ``if`` plugin action conditionals
        to get the content of the files that are not defined in ``files``
        subject rules fields.

        Args:
            fpath (str): Path to the file relative to the root directory.
        """
        return self.files_cache[self._cache_file(fpath)][1]  # type: ignore

[docs]    def cache_files(self, fpaths: t.List[str]) -> None:
        """Cache a set of files given their paths.

        Args:
            fpaths (list): Paths to the files to store in cache.
        """
        self._files = list(self._generator(fpaths))  # type: ignore

        for fpath, _content in self._files:
            if _content is None:
                if fpath in self.serialized_files_cache:
                    self.serialized_files_cache.pop(fpath)

    @property
    def files(self) -> t.List[t.Tuple[str, str]]:
        """Returns an array of the current cached files for a rule action.

        Returns:
            list: Array of tuples with the relative path to the file
                ``rootdir`` as the first item and the content of the file
                as the second one.
        """
        result = []
        for fpath, _content in self._files:
            result.append(
                (
                    os.path.relpath(fpath, self.rootdir)
                    + ("/" if fpath.endswith("/") else ""),
                    _content,
                ),
            )
        return result  # type: ignore

[docs]    def serialize_file(self, fpath: str) -> t.Any:
        """Returns the object-serialized version of a file.

        This method is a convenient cache wrapper for
        :py:func:`project_config.serializers.serialize_for_url`.
        Is used by plugin actions which need an object-serialized
        version of files to perform operations against them, like
        the :ref:`reference/plugins:jmespath` one.

        Args:
            fpath (str): Path to the file to serialize.

        Returns:
            object: Object-serialized version of the file.
        """
        fpath, serializer_name = guess_preferred_serializer(fpath)

        normalized_fpath = self.normalize_path(fpath)
        try:
            result = self.serialized_files_cache[normalized_fpath]
        except KeyError:
            fcontent = self.get_file_content(fpath)
            if fcontent is None:
                raise FileNotFoundError(
                    f"No such file or directory: '{fpath}'",
                )

            result = serialize_for_url(
                fpath,
                fcontent,
                prefer_serializer=serializer_name,
            )
            self.serialized_files_cache[normalized_fpath] = result
        return fpath, result

[docs]    def fetch_file(self, url: str) -> t.Any:
        """Fetch a file from online or offline sources given a url or path.

        This method is a convenient cache wrapper for
        :py:func:`project_config.fetchers.fetch`. Used by plugin actions
        which need an object-serialized version of files to perform
        operations against them, like the :ref:`reference/plugins:jmespath`
        one.

        Args:
            url (str): Url or path to the file to fetch.

        Returns:
            object: Object-serialized version of the file.
        """
        try:
            result = self.serialized_files_cache[url]
        except KeyError:
            result = fetch(url)
            self.serialized_files_cache[url] = result

        return result

[docs]    def edit_serialized_file(self, fpath: str, new_content: t.Any) -> bool:
        """Edit a file in the cache.

        Args:
            fpath (str): Path to the file to edit.
            new_content (object): New content for the file.

        Returns:
            bool: True if the file content has changed, False otherwise.
        """
        fpath, serializer_name = guess_preferred_serializer(fpath)

        normalized_fpath = self.normalize_path(fpath)
        previous_content_string = self.get_file_content(fpath)
        self.serialized_files_cache[normalized_fpath] = new_content

        new_content_string = deserialize_for_url(
            fpath,
            new_content,
            prefer_serializer=serializer_name,
        )
        self.files_cache[normalized_fpath] = (False, new_content_string)

        if previous_content_string != new_content_string:
            with open(fpath, "w", encoding="utf-8") as f:
                f.write(new_content_string)
            return True
        return False