"""Object serializers."""
from __future__ import annotations
import functools
import importlib
import os
import sys
import urllib.parse
from collections.abc import Callable
from typing import TYPE_CHECKING, Any
from identify import identify
from project_config.exceptions import ProjectConfigException
[docs]class SerializerError(ProjectConfigException):
"""Error happened serializing content as JSON."""
if TYPE_CHECKING:
from project_config.compat import (
NotRequired,
Protocol,
TypeAlias,
TypedDict,
)
class SerializerFunction(Protocol):
"""Typecheck protocol for function resolved by serialization factory."""
def __call__( # noqa: D102
self,
_value: Any,
**_kwargs: Any,
) -> Any: ...
SerializerFunctionKwargs: TypeAlias = dict[str, Any]
class SerializerDefinitionType(TypedDict):
"""Serializer definition type."""
module: str
function: NotRequired[str]
function_kwargs_from_url_path: NotRequired[
Callable[[str], SerializerFunctionKwargs]
]
SerializerDefinitionsType: TypeAlias = list[SerializerDefinitionType]
serializers: dict[
str,
tuple[SerializerDefinitionsType, SerializerDefinitionsType],
] = {
".json": (
[{"module": "json"}], # loads
[{"module": "project_config.serializers.json"}], # dumps
),
".json5": (
[{"module": "pyjson5"}, {"module": "json5"}],
[{"module": "pyjson5"}, {"module": "json5"}],
),
".yaml": (
[
{
# Implementation notes:
#
# PyYaml is currently using the Yaml 1.1 specification,
# which converts some words like `on` and `off` to `True`
# and `False`. This leads to problems, for example, checking
# `on.` objects in Github workflows.
#
# There is an issue open to track the progress to support
# YAML 1.2 at https://github.com/yaml/pyyaml/issues/486
#
# Comparison of v1.1 vs v1.2 at:
# https://perlpunk.github.io/yaml-test-schema/schemas.html
#
# },
# },
#
# So we use ruamel.yaml, which supports v1.2 by default
"module": "project_config.serializers.yaml",
},
],
[{"module": "project_config.serializers.yaml"}],
),
".toml": (
[{"module": "project_config.serializers.toml"}],
[{"module": "tomlkit"}],
),
".ini": (
[{"module": "project_config.serializers.ini"}],
[{"module": "project_config.serializers.ini"}],
),
".editorconfig": (
[{"module": "project_config.serializers.editorconfig"}],
[{"module": "project_config.serializers.editorconfig"}],
),
".py": (
[
{
"module": "project_config.serializers.python",
"function_kwargs_from_url_path": lambda path: {
"namespace": {"__file__": path},
},
},
],
[{"module": "project_config.serializers.python"}],
),
}
serializers_fallback: tuple[
SerializerDefinitionsType,
SerializerDefinitionsType,
] = (
[{"module": "project_config.serializers.text"}],
[{"module": "project_config.serializers.text"}],
)
EMPTY_CONTENT_BY_SERIALIZER = {
"json": "{}",
"json5": "{}",
}
SERIALIZER_FROM_EXT_FILENAME = {
".yaml": {
".pre-commit-config.yaml": (
[{"module": "project_config.serializers.yaml"}],
[{"module": "project_config.serializers.contrib.pre_commit"}],
),
},
}
[docs]def _identify_serializer(filename: str) -> str:
tag: str | None = None
for identified_tag in identify.tags_from_filename(filename):
if f".{identified_tag}" in serializers:
tag = identified_tag
break
return tag if tag is not None else "text"
[docs]def guess_serializer_for_path(
path: str,
) -> tuple[Any, Any]:
"""Guess serializer for a path.
Args:
path (str): Path to guess serializer for.
"""
ext = os.path.splitext(path)[-1]
if ext in SERIALIZER_FROM_EXT_FILENAME:
filename = os.path.basename(path)
if filename in SERIALIZER_FROM_EXT_FILENAME[ext]:
return SERIALIZER_FROM_EXT_FILENAME[ext][filename], None
try:
return serializers[ext], None
except Exception:
# try to guess the file type with identify
serializer_name = _identify_serializer(
os.path.basename(path),
)
if f".{serializer_name}" in serializers:
return serializers[f".{serializer_name}"], None
if serializer_name == "text": # pragma: no branch
return serializers_fallback, None
return None, serializer_name
[docs]def _get_serializer_function( # noqa: PLR0912
url: str,
prefer_serializer: str | None = None,
loader_function_name: str = "loads",
) -> SerializerFunction:
url_parts = urllib.parse.urlsplit(url)
serializer = None
if prefer_serializer is not None:
if f".{prefer_serializer}" in serializers:
serializer = serializers[f".{prefer_serializer}"]
elif f".{prefer_serializer}" == ".text":
serializer = serializers_fallback
else:
raise SerializerError(
_file_can_not_be_serialized_as_object_error(
url,
(
f"\nPreferred serializer '{prefer_serializer}'"
" not supported"
),
),
)
else:
serializer, serializer_name = guess_serializer_for_path(url_parts.path)
if serializer is None: # pragma: no cover
raise SerializerError(
_file_can_not_be_serialized_as_object_error(
url,
(
f"\nSerializer detected as '{serializer_name}'"
" not supported"
),
),
) from None
serializer = serializer[0 if loader_function_name == "loads" else 1] # type: ignore
# prepare serializer function
serializer_definition, module = None, None
for i, serializer_def in enumerate(serializer):
try:
module = importlib.import_module(
serializer_def["module"], # type: ignore
)
except ImportError: # pragma: no cover
# if module for implementation is not importable, try next maybe
if i > len(serializer) - 1:
raise
else:
serializer_definition = serializer_def
break
if serializer_definition is None: # pragma: no cover
raise SerializerError(
_file_can_not_be_serialized_as_object_error(
url,
(
f"\nSerializer for url '{url}' can't be located,"
" surely because the library to handle it is"
" not installed."
),
),
)
loader_function: SerializerFunction = getattr(
module,
serializer_definition.get( # type: ignore
"function",
loader_function_name,
),
)
function_kwargs: SerializerFunctionKwargs = {}
"""
if "function_kwargs" in serializer:
function_kwargs = {}
for kwarg_name, kwarg_values in serializer[
"function_kwargs"
].items():
mod = importlib.import_module(kwarg_values["module"])
try:
obj = getattr(mod, kwarg_values["object"])
except AttributeError:
# fallback object, as with pyyaml use CSafeLoader instead
# of SafeLoader if libyyaml bindings are available
if "fallback_object" in kwarg_values:
obj = getattr(mod, kwarg_values["object"])
else:
raise
function_kwargs[kwarg_name] = obj
"""
if "function_kwargs_from_url_path" in serializer_definition: # type: ignore
function_kwargs.update(
serializer_definition["function_kwargs_from_url_path"]( # type: ignore
os.path.basename(url_parts.path),
),
)
return functools.partial(loader_function, **function_kwargs)
[docs]def guess_preferred_serializer(url: str) -> tuple[str, str]:
"""Guess preferred serializer for URL.
Args:
url (str): URL to guess serializer for.
Returns:
tuple: Filename and serializer.
"""
try:
url, serializer_name = url.rsplit("?", maxsplit=1)
except ValueError:
url_parts = urllib.parse.urlsplit(url)
ext = os.path.splitext(url_parts.path)[-1].lstrip(".")
if f".{ext}" in serializers:
return url, ext
return url, _identify_serializer(os.path.basename(url_parts.path))
else:
return url, serializer_name
[docs]def _file_can_not_be_serialized_as_object_error(
url: str,
error_message: str,
) -> str:
return f"'{url}' can't be serialized as a valid object:{error_message}"
[docs]def deserialize_for_url(
url: str,
content: Any,
prefer_serializer: str | None = None,
) -> Any:
"""Deserialize content for URL.
Args:
url (str): URL to deserialize content for.
content (Any): Content to deserialize.
prefer_serializer (str): Preferred serializer.
Returns:
str: Deserialized content.
"""
return _get_serializer_function(
url,
prefer_serializer=prefer_serializer,
loader_function_name="dumps",
)(content)
[docs]def serialize_for_url(
url: str,
string: str,
prefer_serializer: str | None = None,
) -> Any:
"""Serializes to JSON a string according to the given URI.
Args:
url (str): URI of the file, used to detect the type of the file,
either using the extension or through `identify`_.
string (str): File content to serialize.
prefer_serializer (str): Preferred serializer.
Returns:
dict: Result of the object serialization.
.. _identify: https://github.com/pre-commit/identify
"""
try:
# serialize
result = _get_serializer_function(
url,
prefer_serializer=prefer_serializer,
)(
string,
)
except Exception:
# handle exceptions in third party packages without importing them
exc_class, exc, _ = sys.exc_info()
package_name = exc_class.__module__.split(".")[0]
if package_name in ( # Examples:
"json", # json.serializer.JSONDecodeError
"pyjson5", # pyjson5.Json5IllegalCharacter
"tomli", # tomli.TOMLDecodeError
"tomlkit", # tomlkit.exceptions.UnexpectedEofError
):
raise SerializerError(
_file_can_not_be_serialized_as_object_error(
url,
f" {exc.args[0]}", # type: ignore
),
) from None
if package_name == "ruamel":
raise SerializerError(
_file_can_not_be_serialized_as_object_error(
url,
f"\n{str(exc)}",
),
) from None
raise # pragma: no cover
return result