Source code for project_config.serializers

"""Object serializers."""

from __future__ import annotations

import functools
import importlib
import os
import sys
import typing as t
import urllib.parse

from identify import identify

from project_config.compat import NotRequired, Protocol, TypeAlias, TypedDict
from project_config.exceptions import ProjectConfigException


SerializerResult = t.Any


[docs]class SerializerFunction(Protocol): """Typecheck protocol for function resolved by serialization factory.""" def __call__( # noqa: D102 self, value: t.Any, **kwargs: t.Any, ) -> SerializerResult: # pragma: no cover ...
[docs]class SerializerError(ProjectConfigException): """Error happened serializing content as JSON."""
SerializerFunctionKwargs: TypeAlias = t.Dict[str, t.Any]
[docs]class SerializerDefinitionType(TypedDict): """Serializer definition type.""" module: str function: NotRequired[str] function_kwargs_from_url_path: NotRequired[ t.Callable[[str], SerializerFunctionKwargs] ]
SerializerDefinitionsType: TypeAlias = t.List[SerializerDefinitionType] serializers: t.Dict[ str, t.Tuple[SerializerDefinitionsType, SerializerDefinitionsType], ] = { ".json": ( [{"module": "project_config.serializers.json_"}], # loads [{"module": "project_config.serializers.json_"}], # dumps ), ".json5": ( [{"module": "pyjson5"}, {"module": "json5"}], [{"module": "pyjson5"}, {"module": "json5"}], ), ".yaml": ( [ { # Implementation notes: # # PyYaml is currently using the Yaml 1.1 specification, # which converts some words like `on` and `off` to `True` # and `False`. This leads to problems, for example, checking # `on.` objects in Github workflows. # # There is an issue open to track the progress to support # YAML 1.2 at https://github.com/yaml/pyyaml/issues/486 # # Comparison of v1.1 vs v1.2 at: # https://perlpunk.github.io/yaml-test-schema/schemas.html # # "module": "yaml", # "function": "load", # "function_kwargs": { # "Loader": { # "module": "yaml", # "object": "CSafeLoader", # "object_fallback": "SafeLoader", # }, # }, # # So we use ruamel.yaml, which supports v1.2 by default "module": "project_config.serializers.yaml", }, ], [{"module": "project_config.serializers.yaml"}], ), ".toml": ( [{"module": "project_config.serializers.toml"}], [{"module": "tomlkit"}], ), ".ini": ( [{"module": "project_config.serializers.ini"}], [{"module": "project_config.serializers.ini"}], ), ".editorconfig": ( [{"module": "project_config.serializers.editorconfig"}], [{"module": "project_config.serializers.editorconfig"}], ), ".py": ( [ { "module": "project_config.serializers.python", "function_kwargs_from_url_path": lambda path: { "namespace": {"__file__": path}, }, }, ], [{"module": "project_config.serializers.python"}], ), } serializers_fallback: t.Tuple[ SerializerDefinitionsType, SerializerDefinitionsType, ] = ( [{"module": "project_config.serializers.text"}], [{"module": "project_config.serializers.text"}], )
[docs]def _identify_serializer(filename: str) -> str: tag: t.Optional[str] = None for tag_ in identify.tags_from_filename(filename): if f".{tag_}" in serializers: tag = tag_ break return tag if tag is not None else "text"
[docs]def _get_serializer( url: str, prefer_serializer: t.Optional[str] = None, loader_function_name: str = "loads", ) -> SerializerFunction: url_parts = urllib.parse.urlsplit(url) if prefer_serializer is not None: if f".{prefer_serializer}" in serializers: serializer = serializers[f".{prefer_serializer}"] elif f".{prefer_serializer}" == ".text": serializer = serializers_fallback else: raise SerializerError( _file_can_not_be_serialized_as_object_error( url, ( f"\nPreferred serializer '{prefer_serializer}'" " not supported" ), ), ) else: ext = os.path.splitext(url_parts.path)[-1] try: serializer = serializers[ext] except KeyError: # try to guess the file type with identify serializer_name = _identify_serializer( os.path.basename(url_parts.path), ) if serializer_name == "text": serializer = serializers_fallback elif f".{serializer_name}" in serializers: serializer = serializers[f".{serializer_name}"] else: raise SerializerError( _file_can_not_be_serialized_as_object_error( url, ( f"\nSerializer detected as '{serializer_name}'" " not supported" ), ), ) serializer = serializer[ # type: ignore 0 if loader_function_name == "loads" else 1 ] # prepare serializer function serializer_definition, module = None, None for i, serializer_def in enumerate(serializer): try: module = importlib.import_module( serializer_def["module"], # type: ignore ) except ImportError: # pragma: no cover # if module for implementation is not importable, try next maybe if i > len(serializer) - 1: raise else: serializer_definition = serializer_def break loader_function: SerializerFunction = getattr( module, serializer_definition.get( # type: ignore "function", loader_function_name, ), ) function_kwargs: SerializerFunctionKwargs = {} """ if "function_kwargs" in serializer: function_kwargs = {} for kwarg_name, kwarg_values in serializer[ "function_kwargs" ].items(): mod = importlib.import_module(kwarg_values["module"]) try: obj = getattr(mod, kwarg_values["object"]) except AttributeError: # fallback object, as with pyyaml use CSafeLoader instead # of SafeLoader if libyyaml bindings are available if "fallback_object" in kwarg_values: obj = getattr(mod, kwarg_values["object"]) else: raise function_kwargs[kwarg_name] = obj """ if "function_kwargs_from_url_path" in serializer_definition: # type: ignore function_kwargs.update( serializer_definition[ # type: ignore "function_kwargs_from_url_path" ]( os.path.basename(url_parts.path), ), ) return functools.partial(loader_function, **function_kwargs)
[docs]def guess_preferred_serializer(url: str) -> t.Tuple[str, t.Optional[str]]: """Guess preferred serializer for URL. Args: url (str): URL to guess serializer for. Returns: str: Preferred serializer. """ try: # forcing new serializer to get content url, serializer_name = url.rsplit("?", maxsplit=1) except ValueError: url_parts = urllib.parse.urlsplit(url) ext = os.path.splitext(url_parts.path)[-1].lstrip(".") if f".{ext}" in serializers: return url, ext return url, _identify_serializer(os.path.basename(url_parts.path)) else: return url, serializer_name
[docs]def _file_can_not_be_serialized_as_object_error( url: str, error_message: str, ) -> str: return f"'{url}' can't be serialized as a valid object:{error_message}"
[docs]def deserialize_for_url( url: str, content: t.Any, prefer_serializer: t.Optional[str] = None, ) -> t.Any: """Deserialize content for URL. Args: url (str): URL to deserialize content for. content (Any): Content to deserialize. prefer_serializer (str): Preferred serializer. Returns: str: Deserialized content. """ return _get_serializer( url, prefer_serializer=prefer_serializer, loader_function_name="dumps", )(content)
[docs]def serialize_for_url( url: str, string: str, prefer_serializer: t.Optional[str] = None, ) -> SerializerResult: """Serializes to JSON a string according to the given URI. Args: url (str): URI of the file, used to detect the type of the file, either using the extension or through `identify`_. string (str): File content to serialize. Returns: dict: Result of the object serialization. .. _identify: https://github.com/pre-commit/identify """ try: # serialize result = _get_serializer(url, prefer_serializer=prefer_serializer)( string, ) except Exception: # handle exceptions in third party packages without importing them exc_class, exc, _ = sys.exc_info() package_name = exc_class.__module__.split(".")[0] if package_name in ( # Examples: "json", # json.serializer.JSONDecodeError "pyjson5", # pyjson5.Json5IllegalCharacter "tomli", # tomli.TOMLDecodeError "tomlkit", # tomlkit.exceptions.UnexpectedEofError ): raise SerializerError( _file_can_not_be_serialized_as_object_error( url, f" {exc.args[0]}", # type: ignore ), ) elif package_name == "ruamel": # Example: ruamel.yaml.scanner.ScannerError raise SerializerError( _file_can_not_be_serialized_as_object_error( url, f"\n{str(exc)}", ), ) raise # pragma: no cover return result
[docs]def build_empty_file_for_serializer(serializer_name: str) -> str: """Build empty file for serializer. Args: serializer_name (str): Serializer name. Returns: str: Empty file for serializer. """ return "{}" if serializer_name == "json" else ""