Skip to content

Commit

Permalink
enhancement: add new backend to load and dump python code as data
Browse files Browse the repository at this point in the history
add new backend loader and dumper to load from and dump to python code.

- src/anyconfig/backend/python: new backend implementation
- src/anyconfig/backend/__init__.py: enable new backend module
- tests/: test cases and data for new backend module
  • Loading branch information
ssato committed Jan 14, 2024
1 parent b036e66 commit 7e87a6d
Show file tree
Hide file tree
Showing 76 changed files with 482 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/anyconfig/backend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
json,
pickle,
properties,
python,
sh,
toml,
yaml,
Expand All @@ -23,8 +24,8 @@


PARSERS: ParserClssT = [
*ini.PARSERS, *pickle.PARSERS, *properties.PARSERS,
*sh.PARSERS, *xml.PARSERS, *json.PARSERS
*ini.PARSERS, *json.PARSERS, *pickle.PARSERS, *properties.PARSERS,
*python.PARSERS, *sh.PARSERS, *xml.PARSERS,
]


Expand Down
17 changes: 17 additions & 0 deletions src/anyconfig/backend/python/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#
# Copyright (C) 2023, 2024 Satoru SATOH <satoru.satoh @ gmail.com>
# SPDX-License-Identifier: MIT
#
"""Backend modules to load and dump python code holding data.
- python.builtin: builtin parser [default]
Changelog:
.. versionadded:: 0.14.0
"""
from . import builtin
from ..base import ParserClssT


PARSERS: ParserClssT = [builtin.Parser]
43 changes: 43 additions & 0 deletions src/anyconfig/backend/python/builtin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#
# Copyright (C) 2023, 2024 Satoru SATOH <satoru.satoh @ gmail.com>
# SPDX-License-Identifier: MIT
#
r"""A backend module to load and dump python code conntains data.
- Format to support: Python code
- Requirements: None (built-in)
- Development Status :: 3 - Alpha
- Limitations:
- This module will load data as it is. In other words, some options like
ac_dict and ac_ordered do not affetct at all.
- Some primitive data expressions support only
- It might have some vulnerabilities for DoS and aribitary code execution
(ACE) attacks
- It's very simple and should be difficult to dump complex data using this
- Special options:
- allow_exec: bool [False]: Allow execution of the input python code on load
input files. It may cause vulnerabilities for aribitary code execution
(ACE) attacks. So you should set True only if you sure inputs are safe from
reliable sources.
Changelog:
.. versionadded:: 0.14.0
- Added builtin data loader from python code
"""
from .. import base
from . import (
loader, dumper
)


class Parser(base.Parser, loader.Loader, dumper.Dumper):
"""Parser for python code files."""

_cid = 'python.builtin'
_type = 'python'
_extensions = ['py']
39 changes: 39 additions & 0 deletions src/anyconfig/backend/python/dumper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#
# Copyright (C) 2024 Satoru SATOH <satoru.satoh @ gmail.com>
# SPDX-License-Identifier: MIT
#
r"""A backend module to dump python code conntains data.
- Format to support: Python code
- Requirements: None (built-in)
- Development Status :: 3 - Alpha
- Limitations:
- This implementaton is very simple and it should be difficult to dump
complex data using this.
- Special options: None
Changelog:
.. versionadded:: 0.14.0
- Added builtin data dumper from python code
"""
from ..base import (
InDataExT, ToStringDumperMixin
)


class Dumper(ToStringDumperMixin):
"""Dumper for objects as python code."""

def dump_to_string(self, cnf: InDataExT, **kwargs) -> str:
"""Dump config 'cnf' to a string.
:param cnf: Configuration data to dump
:param kwargs: optional keyword parameters to be sanitized :: dict
:return: string represents the configuration
"""
return repr(cnf)
115 changes: 115 additions & 0 deletions src/anyconfig/backend/python/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#
# Copyright (C) 2023, 2024 Satoru SATOH <satoru.satoh @ gmail.com>
# SPDX-License-Identifier: MIT
#
r"""A backend module to load python code conntains data.
- Format to support: Python code
- Requirements: None (built-in)
- Development Status :: 3 - Alpha
- Limitations:
- This module will load data as it is. In other words, some options like
ac_dict and ac_ordered do not affetct at all.
- Some primitive data expressions support only
- It might have some vulnerabilities for DoS and aribitary code execution
(ACE) attacks
- Special options:
- allow_exec: bool [False]: Allow execution of the input python code on load
input files. It may cause vulnerabilities for aribitary code execution
(ACE) attacks. So you should set True only if you sure inputs are safe from
reliable sources.
Changelog:
.. versionadded:: 0.14.0
- Added builtin data loader from python code
"""
import pathlib
import tempfile
import typing

from ... import ioinfo
from ..base import (
IoiT, InDataExT, LoaderMixin
)

from . import utils


def load_from_temp_file(
content: str, **opts: typing.Dict[str, typing.Any]
) -> InDataExT:
"""Dump `content` to tempoary file and load from it.
:param content: A str to load data from
"""
with tempfile.TemporaryDirectory() as tmpdir:
path = pathlib.Path(tmpdir) / "mod.py"
path.write_text(content, encoding='utf-8')

return utils.load_from_path(
path, allow_exec=opts.get("allow_exec", False)
)


class Loader(LoaderMixin):
"""Loader for python code files."""

_allow_primitives: bool = True
_load_opts = ["allow_exec"]

def loads(self, content: str, **options) -> InDataExT:
"""Load config from given string 'content' after some checks.
:param content: Config file content
:param options:
It will be ignored at all except for 'allow_exec' opion to allow
execution of the code
:return:
dict or dict-like object holding input data or primitives
"""
allow_exec = options.get("allow_exec", False)

if allow_exec and content and utils.DATA_VAR_NAME in content:
return load_from_temp_file(content, allow_exec=allow_exec)

return utils.load_literal_data_from_string(content)

def load(self, ioi: IoiT, ac_ignore_missing: bool = False,
**options) -> InDataExT:
"""Load config from ``ioi``.
:param ioi:
'anyconfig.ioinfo.IOInfo' namedtuple object provides various info
of input object to load data from
:param ac_ignore_missing:
Ignore and just return empty result if given `ioi` object does not
exist in actual.
:param options:
options will be passed to backend specific loading functions.
please note that options have to be sanitized w/
:func:`anyconfig.utils.filter_options` later to filter out options
not in _load_opts.
:return: dict or dict-like object holding configurations
"""
allow_exec = options.get("allow_exec", False)

if not ioi:
return {}

if ioinfo.is_stream(ioi):
return load_from_temp_file(
typing.cast(typing.IO, ioi.src).read(),
allow_exec=allow_exec
)

return utils.load_from_path(
pathlib.Path(ioi.path), allow_exec=allow_exec
)
89 changes: 89 additions & 0 deletions src/anyconfig/backend/python/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#
# Copyright (C) 2023, 2024 Satoru SATOH <satoru.satoh @ gmail.com>
# SPDX-License-Identifier: MIT
#
# pylint: disable=missing-docstring
r"""Load data from .py.
.. warning::
- Both load_literal_data_from_string and load_literal_data_from_path only
parse and never execute the given string contains the code so that these
do not have vulnerabilities because of aribitary code execution (ACE)
exploits. But it should be possible to DoS attack.
- load_data_from_py has vulnerabilities because it execute the code. You
must avoid to load .py data from unknown sources with this.
"""
import ast
import importlib
import importlib.abc
import pathlib
import typing
import warnings


DATA_VAR_NAME: str = "DATA"


def load_literal_data_from_string(content: str) -> typing.Any:
"""Load test data expressed by literal data string ``content``."""
return ast.literal_eval(content)


def load_literal_data_from_path(path: pathlib.Path) -> typing.Any:
"""Load test data expressed by literal data from .py files.
.. note:: It should be safer than the above function.
"""
return load_literal_data_from_string(path.read_text().strip())


def load_data_from_py(
path: pathlib.Path,
data_name: typing.Optional[str] = None,
fallback: bool = False
) -> typing.Any:
"""Load test data from .py files by evaluating it.
.. note:: It's not safe and has vulnerabilities for ACE attacks. .
"""
if data_name is None:
data_name = DATA_VAR_NAME

spec = importlib.util.spec_from_file_location('testmod', str(path))
if spec and isinstance(spec.loader, importlib.abc.Loader):
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
try:
return getattr(mod, data_name)
except (TypeError, ValueError, AttributeError):
warnings.warn( # noqa
f'No valid data "{data_name}" was found in {mod!r}.'
)

if fallback:
return None

raise ValueError(f"Faied to load data from: {path!r}")


def load_from_path(
path: pathlib.Path,
allow_exec: bool = False,
data_name: typing.Optional[str] = None,
fallback: bool = False
) -> typing.Any:
"""Load data from given path `path`.
It will choose the appropriate function by the keyword, `data_name`, in the
content of the file.
:param allow_exec: The code will be executed if True
"""
if allow_exec and (data_name or DATA_VAR_NAME) in path.read_text():
return load_data_from_py(
path, data_name=data_name, fallback=fallback
)

return load_literal_data_from_path(path)
Empty file.
1 change: 1 addition & 0 deletions tests/backend/dumpers/python/test_python_builtin.py
Empty file.
1 change: 1 addition & 0 deletions tests/backend/loaders/python/test_python_builtin.py
Loading

0 comments on commit 7e87a6d

Please sign in to comment.