diff --git a/.github/workflows/Tests.yaml b/.github/workflows/Tests.yaml index 11f6afc..f9c28a2 100644 --- a/.github/workflows/Tests.yaml +++ b/.github/workflows/Tests.yaml @@ -10,7 +10,7 @@ jobs: Test: strategy: matrix: - python-version: ["3.5", "3.6", "3.7","3.8", "3.9", "3.10.0-alpha.4", "pypy3"] + python-version: ["3.7", "3.8", "3.9", "3.10.0-alpha.4", "pypy-3.7"] os: [ubuntu-latest, windows-latest] runs-on: ${{ matrix.os }} @@ -50,34 +50,29 @@ jobs: uses: codecov/codecov-action@v1 with: flags: ${{ runner.os }} + + Typecheck: + runs-on: ubuntu-latest + steps: + # setup + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: "3.9" + # dependencies + - name: Restore pip cache + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('setup.cfg') }} + + - name: Install test dependencies + run: python -m pip --cache-dir ~/.cache/pip install --quiet mypy -# Typecheck: # disabled until we have type hints -# runs-on: ubuntu-latest -# strategy: -# matrix: -# python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10.0-alpha.4", "pypy3"] -# -# steps: -# # setup -# - name: Checkout Repository -# uses: actions/checkout@v2 -# -# - name: Set up Python ${{ matrix.python-version }} -# uses: actions/setup-python@v2 -# with: -# python-version: ${{ matrix.python-version }} -# -# # dependencies -# - name: Restore pip cache -# uses: actions/cache@v2 -# with: -# path: ~/.cache/pip -# key: ${{ runner.os }}-pip-${{ hashFiles('setup.cfg') }} -# -# - name: Install lint dependencies -# run: python -m pip --cache-dir ~/.cache/pip install --quiet mypy -# -# # lint -# - name: run MyPy -# run: python -m mypy --config-file setupg.cfg . + # check + - name: run MyPy + run: python -m mypy --config-file setup.cfg -p pyhp diff --git a/.gitignore b/.gitignore index 33111af..3448507 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ */__pycache__ +*.pyc +.coverage +.htmlcov dist -notes.txt -Test.html - +**/.vscode/** diff --git a/README.md b/README.md index db5d6de..ad256c3 100644 --- a/README.md +++ b/README.md @@ -44,10 +44,10 @@ The script is called either by the configuration of the web server or a shebang initialization parameters and provides the following methods: - `is_available`, wich returns a boolean indicating if the handler can be used - `is_outdated`, wich returns a boolean indicating if the cache needs to be renewed - - `save`, wich takes an iterator as argument and saves it in the cache - - `load`, wich loads an iterator from the cache - - `close`, wich does cleanup tasks - - note that the iterator may contain code objects which can't be pickled + - `save`, wich takes an code object as an argument and saves it in the cache + - `load`, wich loads an code object from the cache + - `close`, wich does cleanup tasks and gets called when used as a context manager + - note that code objects have to support the pickle protocol - examples are available in the *cache_handlers* directory ## Installation diff --git a/cache_handlers/files_mtime.py b/cache_handlers/files_mtime.py index 502ba23..a70c120 100644 --- a/cache_handlers/files_mtime.py +++ b/cache_handlers/files_mtime.py @@ -2,7 +2,7 @@ """PyHP cache handler (files with modification time)""" -import marshal # not pickle because only marshal supports code objects +import pickle import os.path from os import makedirs from time import time @@ -16,6 +16,12 @@ def __init__(self, cache_path, file_path, config): self.ttl = config.getint("ttl") self.max_size = config.getint("max_size") + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() + def get_cachedir_size(self): # get size of cache directory (with all sub directories) in Mbytes size = 0 for dirpath, dirnames, filenames in os.walk(self.cache_prefix, followlinks=False): @@ -40,14 +46,13 @@ def is_outdated(self): # return True if cache is not created or needs refre def load(self): # load sections with open(self.cache_path, "rb") as cache: - code = marshal.load(cache) - return code + return pickle.load(cache) def save(self, code): # save sections if not os.path.isdir(os.path.dirname(self.cache_path)): # directories not already created makedirs(os.path.dirname(self.cache_path), exist_ok=True) # ignore already created directories with open(self.cache_path, "wb") as cache: - marshal.dump(code, cache) + pickle.dump(code, cache) def close(self): pass # nothing to do diff --git a/debian/changelog b/debian/changelog index 15661a9..600eeca 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,15 @@ +pyhp (2.1-1) stable; urgency=low + + * fifth release + * add bytecode code object implementation + * change CacheHandlers to support the context manager protocol + * change CacheHandlers to work with code objects directly + * replace pyhp.embed with pyhp.compiler + * fix errors during compilation and execution having wrong line numbers + * fix code objects lacking proper module constants + + -- Eric Wolf Wed, 27 Jan 2021 19:17:00 +0100 + pyhp (2.0-1) stable; urgency=low * fourth release diff --git a/debian/control b/debian/control index 2679287..ce6d9a0 100644 --- a/debian/control +++ b/debian/control @@ -3,7 +3,7 @@ Version: {0} Architecture: all Maintainer: Eric Wolf Installed-Size: {1} -Depends: python3:any (>= 3.5) +Depends: python3:any (>= 3.7) Suggests: apache2 Section: web Priority: optional diff --git a/debian/copyright b/debian/copyright index f6c966e..47a6531 100644 --- a/debian/copyright +++ b/debian/copyright @@ -2,9 +2,9 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: pyhp Upstream-Contact: Eric Wolf Source: https://github.com/Deric-W/PyHP -Copyright: 2019 Eric Wolf -License: Expat +Copyright: 2021 Eric Wolf +License: GPLv3 Files: * -Copyright: 2019 Eric Wolf -License: Expat +Copyright: 2021 Eric Wolf +License: GPLv3 diff --git a/debian/pyhp b/debian/pyhp index 04445aa..660173c 100644 --- a/debian/pyhp +++ b/debian/pyhp @@ -1,12 +1,28 @@ #!/usr/bin/python3 -# script to support the pyhp command +"""Script to support the pyhp command""" +# This script is part of PyHP (https://github.com/Deric-W/PyHP) +# Copyright (C) 2021 Eric Wolf + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# SPDX-License-Identifier: GPL-3.0-only import sys -from pyhp.main import main, get_args +from pyhp.main import main, argparser # get cli arguments -args = get_args() +args = argparser.parse_args() -# execute main with file_path as normal argument and the rest as keyword arguments -sys.exit(main(args.pop("file_path"), **args)) +# execute main +sys.exit( + main( + args.file, + args.caching, + args.config + ) +) diff --git a/pyhp.conf b/pyhp.conf index 2a7cde0..9a78328 100644 --- a/pyhp.conf +++ b/pyhp.conf @@ -2,9 +2,18 @@ # This file uses the INI syntax [parser] -# regex to isolate the code -# escape sequences are processed -regex = \\<\\?pyhp[\\s](.*?)[\\s]\\?\\> +# regex matching the start and end of a code section +# escape sequences are processed (at the cost of missing unicode support) +start = <\\?pyhp\\s +end = \\s\\?> + +# if python code should be stripped of a starting indentation +dedent = True + +[compiler] +# optimization level to be used with the builtin compile() +# all levels: https://docs.python.org/3/library/functions.html#compile +optimization_level = -1 [request] # order to fill up REQUEST, starting left and missing methods are not filled in @@ -40,39 +49,3 @@ path = ~/.cache/pyhp # path to handler handler_path = /lib/pyhp/cache_handlers/files_mtime.py - -[sessions] -enable = True -auto_start = False - -# path argument for handler -path = ~/.pyhp/sessions - -# session handler + directory containing the session handler -handler = files -handler_path = /lib/pyhp/session_handlers - -# lenght of the session id -sid_length = 32 - -# how to serialize/unserialize session data, pickle or json -serialize_handler = pickle - -# config for session cookie -name = PyHPSESSID -cookie_lifetime = 0 -cookie_path = / -cookie_domain = -cookie_secure = True -cookie_httponly = False -cookie_samesite = - -# probability/divisor = probability for carrying out a garbage collection at startup -gc_probability = 1 -gc_divisor = 100 - -# max lifetime of session since last use -gc_maxlifetime = 1440 - -# write only if data has changed -lazy_write = True diff --git a/pyhp/__init__.py b/pyhp/__init__.py index 58883e1..6beb14a 100644 --- a/pyhp/__init__.py +++ b/pyhp/__init__.py @@ -13,14 +13,15 @@ # package metadata # needs to be defined before .main is imported -__version__ = "2.0" +__version__ = "2.1" __author__ = "Eric Wolf" __maintainer__ = "Eric Wolf" -__license__ = "MIT" +__license__ = "GPLv3" __email__ = "robo-eric@gmx.de" # please dont use for spam :( __contact__ = "https://github.com/Deric-W/PyHP" -# import all submodules -from . import embed -from . import libpyhp -from . import main +__all__ = ( + "compiler", + "libpyhp", + "main" +) diff --git a/pyhp/__main__.py b/pyhp/__main__.py index 816a45b..7cce7fd 100644 --- a/pyhp/__main__.py +++ b/pyhp/__main__.py @@ -13,10 +13,16 @@ # SPDX-License-Identifier: GPL-3.0-only import sys -from .main import main, get_args +from .main import main, argparser # get cli arguments -args = get_args() +args = argparser.parse_args() -# execute main with file_path as normal argument and the rest as keyword arguments -sys.exit(main(args.pop("file_path"), **args)) +# execute main +sys.exit( + main( + args.file, + args.caching, + args.config + ) +) diff --git a/pyhp/compiler/__init__.py b/pyhp/compiler/__init__.py new file mode 100644 index 0000000..d7b0ed7 --- /dev/null +++ b/pyhp/compiler/__init__.py @@ -0,0 +1,113 @@ +#!/usr/bin/python3 + +"""Package containing the compiler subsystem""" +# The compiler package is part of PyHP (https://github.com/Deric-W/PyHP) +# Copyright (C) 2021 Eric Wolf + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# SPDX-License-Identifier: GPL-3.0-only + +from __future__ import annotations +from abc import ABCMeta, abstractmethod +from importlib.machinery import ModuleSpec +from typing import Dict, MutableMapping, Iterator, Tuple, Any, TypeVar, Generic + + +__all__ = ( + "Code", + "CodeBuilder", + "CodeBuilderDecorator", + "Parser", + "parsers", # pylint: disable=E0603 + "generic", # pylint: disable=E0603 + "bytecode", # pylint: disable=E0603 + "util" # pylint: disable=E0603 +) + +B = TypeVar("B", bound="CodeBuilder") + + +class Code(metaclass=ABCMeta): + """abstract base class for code objects""" + __slots__ = () + + @abstractmethod + def __eq__(self, other: object) -> bool: + raise NotImplementedError + + @abstractmethod + def execute(self, variables: Dict[str, Any]) -> Iterator[str]: + """execute the code, yielding the text sections between code sections""" + raise NotImplementedError + + +class CodeBuilder(metaclass=ABCMeta): + """abstract base class for all code builders""" + __slots__ = () + + def __deepcopy__(self, memo: MutableMapping[int, Any]) -> CodeBuilder: + builder = self.copy() + memo[id(self)] = builder + return builder + + def add_code(self, code: str, offset: int) -> None: + """add a code section with a line offset""" + + def add_text(self, text: str, offset: int) -> None: + """add a text section with a line offset""" + + @abstractmethod + def code(self, spec: ModuleSpec) -> Code: + """build a code object from the received sections""" + raise NotImplementedError + + @abstractmethod + def copy(self: B) -> B: + """copy the builder with his current state""" + raise NotImplementedError + + +class CodeBuilderDecorator(CodeBuilder, Generic[B]): + """abstract base class for code builder decorators""" + __slots__ = ("builder",) + + builder: B + + def add_code(self, code: str, offset: int) -> None: + """delegate method call to decorated builder""" + self.builder.add_code(code, offset) + + def add_text(self, text: str, offset: int) -> None: + """delegate method call to decorated builder""" + self.builder.add_text(text, offset) + + def code(self, spec: ModuleSpec) -> Code: + """delegate method call to decorated builder""" + return self.builder.code(spec) + + def detach(self) -> B: + """detach the decorator from the builder, leaving it in a undefined state""" + return self.builder + + +class Parser(metaclass=ABCMeta): + """abstract base class for parsers""" + __slots__ = () + + @abstractmethod + def parse(self, source: str, line_offset: int = 0) -> Iterator[Tuple[str, int, bool]]: + """parse source code, yielding sections with line offset and bool to indicate if they are code""" + raise NotImplementedError + + def build(self, source: str, builder: CodeBuilder, line_offset: int = 0) -> None: + """parse source code and submit the results to the builder""" + for section, offset, is_code in self.parse(source, line_offset): + if is_code: + builder.add_code(section, offset) + else: + builder.add_text(section, offset) diff --git a/pyhp/compiler/bytecode.py b/pyhp/compiler/bytecode.py new file mode 100644 index 0000000..317a65b --- /dev/null +++ b/pyhp/compiler/bytecode.py @@ -0,0 +1,175 @@ +#!/usr/bin/python3 + +"""Module containing a bytecode code implementation""" +# The compiler.bytecode module is part of PyHP (https://github.com/Deric-W/PyHP) +# Copyright (C) 2021 Eric Wolf + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# SPDX-License-Identifier: GPL-3.0-only + +from __future__ import annotations +import ast +import marshal +from types import CodeType +from typing import Dict, List, Tuple, Iterator, Any +from importlib.machinery import ModuleSpec +from . import Code, CodeBuilder + + +__all__ = ("ByteCode", "ByteCodeBuilder") + +GENERATOR_NOOP: List[ast.AST] = [ # to force a generator in case there are no text sections + ast.Return( + value=None, # simple 'return' to end generator before yield + lineno=1, + col_offset=0 + ), + ast.Expr( # to convince python to make a generator + value=ast.Yield( + value=ast.Constant( + value="", + lineno=1, + col_offset=0 + ), + lineno=1, + col_offset=0 + ), + lineno=1, + col_offset=0 + ) +] + + +class ByteCode(Code): + """Code implementation using a compiled ast""" + __slots__ = ("code", "spec") + + code: CodeType + + spec: ModuleSpec + + def __init__(self, code: CodeType, spec: ModuleSpec) -> None: + """construct a instance with a compiled ast and a spec""" + self.code = code + self.spec = spec + + def __getstate__(self) -> Tuple[bytes, ModuleSpec]: # pickle cant process code objects + """support pickling""" + return marshal.dumps(self.code), self.spec + + def __setstate__(self, state: Tuple[bytes, ModuleSpec]) -> None: + """support pickling""" + self.code = marshal.loads(state[0]) + self.spec = state[1] + + def __eq__(self, other: object) -> bool: + if isinstance(other, ByteCode): + return self.code == other.code and self.spec == other.spec + return NotImplemented + + def execute(self, variables: Dict[str, Any]) -> Iterator[str]: + """execute the code, yielding the text sections between code sections""" + variables["__spec__"] = self.spec + variables["__name__"] = self.spec.name + variables["__loader__"] = self.spec.loader # lets hope they remove these constants + variables["__file__"] = self.spec.origin + variables["__path__"] = self.spec.submodule_search_locations + variables["__cached__"] = self.spec.cached + variables["__package__"] = self.spec.parent + exec(self.code, variables) # create generator + return variables[""]() # the compiled generator + + +class ByteCodeBuilder(CodeBuilder): + """code builder for the bytecode implementation""" + __slots__ = ("nodes", "has_text", "optimization_level") + + nodes: List[ast.AST] + + has_text: bool + + optimization_level: int + + def __init__(self, optimization_level: int = -1) -> None: + """construct a instance with the optimization level to compile code sections""" + self.nodes = [] + self.has_text = False + self.optimization_level = optimization_level + + def add_code(self, code: str, offset: int) -> None: + """add a code section with a line offset""" + try: + nodes = [ + ast.increment_lineno(node, offset) for node in ast.parse(code, mode="exec").body + ] + except SyntaxError as e: # set correct lineno and reraise + if e.lineno is not None: + e.lineno += offset + raise + self.nodes.extend(nodes) + + def add_text(self, text: str, offset: int) -> None: # pylint: disable=W0613 + """add a text section with a line offset""" + if text: # ignore empty sections + self.nodes.append( + ast.Expr( + value=ast.Yield( + value=ast.Constant( + value=text, + lineno=offset + 1, + col_offset=0 + ), + lineno=offset + 1, + col_offset=0 + ), + lineno=offset + 1, + col_offset=0 + ) + ) + self.has_text = True + + def code(self, spec: ModuleSpec) -> ByteCode: + """build a code object from the received sections""" + if self.has_text: # result will be a generator + nodes = self.nodes + else: # result will be a function, add noop yield + nodes = self.nodes + GENERATOR_NOOP + code = compile( + ast.Module( + body=[ + ast.FunctionDef( + name="", + args=ast.arguments( + posonlyargs=[], + args=[], + kwonlyargs=[], + kw_defaults=[], + defaults=[] + ), + body=nodes, + decorator_list=[], + lineno=1, + col_offset=0 + ) + ], + type_ignores=[] + ), + "" if spec.origin is None else spec.origin, + "exec", + optimize=self.optimization_level, + dont_inherit=True + ) + return ByteCode(code, spec) + + def copy(self) -> ByteCodeBuilder: + """copy the builder with his current state""" + builder = self.__class__.__new__(self.__class__) + builder.nodes = self.nodes.copy() + builder.has_text = self.has_text + builder.optimization_level = self.optimization_level + return builder diff --git a/pyhp/compiler/generic.py b/pyhp/compiler/generic.py new file mode 100644 index 0000000..992981a --- /dev/null +++ b/pyhp/compiler/generic.py @@ -0,0 +1,126 @@ +#!/usr/bin/python3 + +"""Module containing a generic code implementation""" +# The compiler.generic module is part of PyHP (https://github.com/Deric-W/PyHP) +# Copyright (C) 2021 Eric Wolf + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# SPDX-License-Identifier: GPL-3.0-only + +from __future__ import annotations +import ast +import marshal +from types import CodeType +from typing import Dict, Iterator, List, Sequence, Union, Any, Tuple +from importlib.machinery import ModuleSpec +from . import Code, CodeBuilder + + +__all__ = ("GenericCode", "GenericCodeBuilder") + + +class GenericCode(Code): + """Code implementation using a sequence of code objects""" + __slots__ = ("sections", "spec") + + sections: Sequence[Union[CodeType, str]] + + spec: ModuleSpec + + def __init__(self, sections: Sequence[Union[CodeType, str]], spec: ModuleSpec) -> None: + """construct a instance with the sections and a spec""" + self.sections = sections + self.spec = spec + + def __getstate__(self) -> Tuple[bytes, ModuleSpec]: # pickle cant process code objects + """support pickling""" + return marshal.dumps(self.sections), self.spec + + def __setstate__(self, state: Tuple[bytes, ModuleSpec]) -> None: + """support pickling""" + self.sections = marshal.loads(state[0]) + self.spec = state[1] + + def __eq__(self, other: object) -> bool: + if isinstance(other, GenericCode): + return self.sections == other.sections and self.spec == other.spec + return NotImplemented + + def execute(self, variables: Dict[str, Any]) -> Iterator[str]: + """execute the code, yielding the text sections between code sections""" + variables["__spec__"] = self.spec + variables["__name__"] = self.spec.name + variables["__loader__"] = self.spec.loader # lets hope they remove these constants + variables["__file__"] = self.spec.origin + variables["__path__"] = self.spec.submodule_search_locations + variables["__cached__"] = self.spec.cached + variables["__package__"] = self.spec.parent + for section in self.sections: + if isinstance(section, CodeType): + exec(section, variables) + else: + yield section + + +class GenericCodeBuilder(CodeBuilder): + """Code builder for the generic code implementation""" + __slots__ = ("sections", "optimization_level") + + sections: List[Union[ast.AST, str]] + + optimization_level: int + + def __init__(self, optimization_level: int = -1) -> None: + """construct a instance with the optimization level to compile code sections""" + self.sections = [] + self.optimization_level = optimization_level + + def add_code(self, code: str, offset: int) -> None: + """add a code section with a line offset""" + try: + module = ast.parse(code, "", mode="exec") + except SyntaxError as e: # set correct lineno and reraise + if e.lineno is not None: + e.lineno += offset + raise + ast.increment_lineno(module, offset) + self.sections.append(module) + + def add_text(self, text: str, offset: int) -> None: # pylint: disable=W0613 + """add a text section with a line offset""" + if text: # ignore empty sections + self.sections.append(text) + + def compile_sections(self, name: str) -> Iterator[Union[CodeType, str]]: + """compile the code sections""" + for section in self.sections: + if isinstance(section, ast.AST): + yield compile( + section, + name, + "exec", + dont_inherit=True, + optimize=self.optimization_level + ) + else: + yield section + + def code(self, spec: ModuleSpec) -> GenericCode: + """build a code object from the received sections""" + if spec.origin is None: + sections = self.compile_sections("") + else: + sections = self.compile_sections(spec.origin) + return GenericCode(tuple(sections), spec) + + def copy(self) -> GenericCodeBuilder: + """copy the builder with his current state""" + builder = self.__class__.__new__(self.__class__) + builder.sections = self.sections.copy() + builder.optimization_level = self.optimization_level + return builder diff --git a/pyhp/compiler/parsers.py b/pyhp/compiler/parsers.py new file mode 100644 index 0000000..09e54ac --- /dev/null +++ b/pyhp/compiler/parsers.py @@ -0,0 +1,52 @@ +#!/usr/bin/python3 + +"""Module containing parsers""" +# The compiler.parsers module is part of PyHP (https://github.com/Deric-W/PyHP) +# Copyright (C) 2021 Eric Wolf + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# SPDX-License-Identifier: GPL-3.0-only + +from typing import Pattern, Tuple, Iterator +from . import Parser + + +__all__ = ("RegexParser",) + + +class RegexParser(Parser): + """parser implementation identifying the start and end of a section with regular expressions""" + __slots__ = ("start", "end") + + start: Pattern[str] + + end: Pattern[str] + + def __init__(self, start: Pattern[str], end: Pattern[str]) -> None: + """construct a instance with the patterns for the start and end of code sections""" + self.start = start + self.end = end + + def parse(self, source: str, line_offset: int = 0) -> Iterator[Tuple[str, int, bool]]: + """parse source code, yielding sections with line offset and bool to indicate if they are code""" + pos = 0 + length = len(source) + is_code = False # start with text section because code sections start after self.start + while pos < length: # finish parsing if we reached the end of the source + if is_code: # search for the end if we are in a code section + match = self.end.search(source, pos) + else: # otherwise for the next code section + match = self.start.search(source, pos) + if match is None: # current section is the last one, yield rest of source + yield source[pos:], line_offset, is_code + break # no match left in source, finish parsing + else: + yield source[pos:match.start()], line_offset, is_code + line_offset += source.count("\n", pos, match.end()) # update offset + pos = match.end() # update pos to end of match + is_code = not is_code # toggle mode, codes comes after text and so on diff --git a/pyhp/compiler/util.py b/pyhp/compiler/util.py new file mode 100644 index 0000000..06f5727 --- /dev/null +++ b/pyhp/compiler/util.py @@ -0,0 +1,116 @@ +#!/usr/bin/python3 + +"""Module containing utilities""" +# The compiler.util module is part of PyHP (https://github.com/Deric-W/PyHP) +# Copyright (C) 2021 Eric Wolf + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# SPDX-License-Identifier: GPL-3.0-only + +from __future__ import annotations +import re +from typing import Optional, TextIO, TypeVar, Generic +from importlib.abc import Loader +from importlib.machinery import ModuleSpec +from . import Parser, CodeBuilder, CodeBuilderDecorator, Code + + +__all__ = ("Compiler", "StartingIndentationError", "Dedenter") + +WHITESPACE_REGEX = re.compile(r"\s*") # match zero or more times to match no whitespace too + +P = TypeVar("P", bound=Parser) +B = TypeVar("B", bound=CodeBuilder) + + +class Compiler(Generic[P, B]): + """Facade to the compiler subsystem""" + __slots__ = ("parser", "base_builder") + + parser: P + + base_builder: B + + def __init__(self, parser: P, builder: B) -> None: + """construct a instance with a parser and a code builder""" + self.parser = parser + self.base_builder = builder + + def builder(self) -> B: + """get a code builder who is not used by other threads""" + return self.base_builder.copy() + + def compile_str(self, source: str, origin: str = "", loader: Optional[Loader] = None) -> Code: + """compile a source string into a code object""" + builder = self.builder() + self.parser.build(source, builder) + return builder.code(ModuleSpec("__main__", loader, origin=origin, is_package=False)) + + def compile_file(self, file: TextIO, loader: Optional[Loader] = None) -> Code: + """compile a text stream into a code object""" + builder = self.builder() + first_line = file.readline() + if first_line.startswith("#!"): # shebang, remove first line + self.parser.build(file.read(), builder, 1) # line offset of 1 to compensate for removed shebang + else: + self.parser.build(first_line + file.read(), builder) + spec = ModuleSpec("__main__", loader, origin=file.name, is_package=False) + spec.has_location = True + return builder.code(spec) + + +class StartingIndentationError(IndentationError): + """Exception raised when a line does not start with the starting indentation""" + __slots__ = () + + +class Dedenter(CodeBuilderDecorator[B]): + """decorator which removes a starting indentation from code sections""" + __slots__ = () + + def __init__(self, builder: B) -> None: + """construct a instance with the builder to decorate""" + self.builder = builder + + @staticmethod + def get_indentation(line: str) -> str: + """get the indentation of a line of code""" + return WHITESPACE_REGEX.match(line).group(0) # type: ignore + + @staticmethod + def is_code(line: str) -> bool: + """check if the line contains code""" + return not (not line or line.isspace() or line.lstrip().startswith("#")) + + def add_code(self, code: str, offset: int) -> None: + """delegate method call to builder with dedented code""" + lines = code.splitlines() + indentation = None + for line_num, line in enumerate(lines): + if self.is_code(line): # ignore lines without code + if indentation is None: # first line of code, set starting indentation + indentation = self.get_indentation(line) + if line.startswith(indentation): # if line starts with starting indentation + lines[line_num] = line[len(indentation):] # remove starting indentation + else: + raise StartingIndentationError( # raise Exception on bad indentation + f"line does not start with the indentation of line {offset + 1}", + ( + "", + line_num + offset + 1, + len(indentation), + line + ) + ) + self.builder.add_code("\n".join(lines), offset) # join the lines back together + + def copy(self) -> Dedenter[B]: + """copy the dedenter with his current state""" + dedenter = self.__class__.__new__(self.__class__) + dedenter.builder = self.builder.copy() + return dedenter diff --git a/pyhp/embed.py b/pyhp/embed.py deleted file mode 100644 index 70c9612..0000000 --- a/pyhp/embed.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/python3 - -"""Module for processing strings embedded in text files""" -# The embed module is part of PyHP (https://github.com/Deric-W/PyHP) -# Copyright (C) 2021 Eric Wolf - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# SPDX-License-Identifier: GPL-3.0-only - -import re -import sys -from io import StringIO -from contextlib import redirect_stdout - - -# class for handling strings -class FromString: - # get string, regex to isolate code and optional flags for the regex (default for processing text files) - # the userdata is given to the processor function to allow state - def __init__(self, string, regex, flags=re.MULTILINE | re.DOTALL, userdata=None): - self.sections = re.split(regex, string, flags=flags) - self.userdata = userdata - - # process string with the code replaced by the output of the processor function - # this will modify self.sections - def process(self, processor): - code_sections = 0 - # the first section is always not code, and every code section has string sections as neighbors - for i in range(1, len(self.sections), 2): - code_sections += 1 - self.sections[i] = processor(self.sections[i], self.userdata) - return code_sections - - # process the string and write the string and replaced code parts to sys.stdout - # this will not modify self.sections an requires an processor to write the data himself - def execute(self, processor): - code_sections = 0 - for i in range(0, len(self.sections)): - code_sections += 1 - if i % 2 == 1: # uneven index --> code - processor(self.sections[i], self.userdata) - else: # even index --> not code - if self.sections[i]: # ignore empthy sections - sys.stdout.write(self.sections[i]) - return code_sections - - def __str__(self): - return "".join(self.sections) - - -# wrapper class for handling presplit strings -class FromIter(FromString): - # get presplit string as iterator - def __init__(self, iterator, userdata=None): - self.sections = list(iterator) - self.userdata = userdata - -# function for executing python code -# userdata = [locals, section_number], init with [{}, 0] -def python_execute(code, userdata): - userdata[1] += 1 - try: - exec(python_align(code), globals(), userdata[0]) - except Exception as e: # tell the user the section of the Exception - raise Exception("Exception during execution of section %d" % userdata[1]) from e - -# compile python code sections -# userdata = [file, section_number], init with [str, 0] -def python_compile(code, userdata): - userdata[1] += 1 - try: - return compile(python_align(code), userdata[0], "exec") - except Exception as e: # tell the user the section of the Exception - raise Exception("Exception during executing of section %d" % userdata[1]) from e - -# execute compiled python sections -# userdata is the same as python_execute -def python_execute_compiled(code, userdata): - userdata[1] += 1 - try: - exec(code, globals(), userdata[0]) - except Exception as e: - raise Exception("Exception during executing of section %d" % userdata[1]) from e - -# function for aligning python code in case of a startindentation -def python_align(code, indentation=None): - line_num = 0 - code = code.splitlines() # split to lines - for line in code: - line_num += 1 - if not (not line or line.isspace() or python_is_comment(line)): # ignore non code lines - if indentation is None: # first line of code, get startindentation - indentation = python_get_indentation(line) - if line.startswith(indentation): # if line starts with startindentation - code[line_num - 1] = line[len(indentation):] # remove startindentation - else: - raise IndentationError("indentation not matching", ("embedded code section", line_num, len(indentation), line)) # raise Exception on bad indentation - return "\n".join(code) # join the lines back together - - -# function for getting the indentation of a line of python code -def python_get_indentation(line): - indentation = "" - for char in line: - if char in " \t": - indentation += char - else: - break - return indentation - -# check if complete line is a comment -def python_is_comment(line): - return line.lstrip().startswith("#") diff --git a/pyhp/main.py b/pyhp/main.py index e7c602e..6d2971c 100644 --- a/pyhp/main.py +++ b/pyhp/main.py @@ -13,105 +13,141 @@ # SPDX-License-Identifier: GPL-3.0-only import sys -import os +import os.path +import re import argparse import configparser import importlib -import atexit -import errno -from . import __version__ -from . import embed -from . import libpyhp - - -# get cli arguments for main as dict -def get_args(): - parser = argparse.ArgumentParser(prog="pyhp", description="Interpreter for .pyhp Scripts (https://github.com/Deric-W/PyHP)") - parser.add_argument("-c", "--caching", help="enable caching (requires file)", action="store_true") - parser.add_argument("-v", "--version", help="display version number", action="version", version="%(prog)s {version}".format(version=__version__)) - parser.add_argument("file", type=str, help="file to be interpreted (omit for reading from stdin)", nargs="?", default="") - parser.add_argument("--config", type=str, help="path to custom config file", nargs="?", const="/etc/pyhp.conf", default="/etc/pyhp.conf") - args = parser.parse_args() - return {"file_path": args.file, "caching": args.caching, "config_file": args.config} - -# start the PyHP Interpreter with predefined arguments -def main(file_path, caching=False, config_file="/etc/pyhp.conf"): +from typing import TextIO, Any +from . import __version__, libpyhp +from .compiler import util, generic, parsers + + +__all__ = ("argparser", "main") + +argparser = argparse.ArgumentParser( + prog="pyhp", + description="Interpreter for .pyhp Scripts (https://github.com/Deric-W/PyHP)" +) +argparser.add_argument( + "-c", "--caching", + help="enable caching (requires file)", + action="store_true" +) +argparser.add_argument( + "-v", "--version", + help="display version number", + action="version", + version=f"%(prog)s {__version__}" +) +argparser.add_argument( + "file", type=argparse.FileType('r'), + help="file to be interpreted (omit for reading from stdin)", + nargs="?", + default=sys.stdin +) +argparser.add_argument( + "--config", + type=str, + help="path to custom config file", + default="/etc/pyhp.conf" +) + + +def main(file: TextIO, caching: bool = False, config_file: str = "/etc/pyhp.conf") -> int: + """start the PyHP Interpreter with predefined arguments""" config = configparser.ConfigParser(inline_comment_prefixes="#") # allow inline comments - if config_file not in config.read(config_file): # reading file failed - raise FileNotFoundError(errno.ENOENT, "failed to read config file", config_file) + with open(config_file, "r") as fd: + config.read_file(fd) # prepare the PyHP Object - PyHP = libpyhp.PyHP(file_path=file_path, - request_order=config.get("request", "request_order", fallback="GET POST COOKIE").split(), - keep_blank_values=config.getboolean("request", "keep_blank_values", fallback=True), - fallback_value=config.get("request", "fallback_value", fallback=""), - enable_post_data_reading=config.getboolean("request", "enable_post_data_reading", fallback=False), - default_mimetype=config.get("request", "default_mimetype", fallback="text/html") - ) - sys.stdout.write = PyHP.make_header_wrapper(sys.stdout.write) # wrap stdout - atexit.register(PyHP.run_shutdown_functions) # run shutdown functions even if a exception occured + PyHP = libpyhp.PyHP( + file_path=file.name, + request_order=config.get("request", "request_order", fallback="GET POST COOKIE").split(), + keep_blank_values=config.getboolean("request", "keep_blank_values", fallback=True), + fallback_value=config.get("request", "fallback_value", fallback=""), + enable_post_data_reading=config.getboolean("request", "enable_post_data_reading", fallback=False), + default_mimetype=config.get("request", "default_mimetype", fallback="text/html") + ) + # wrap stdout + sys.stdout.write = PyHP.make_header_wrapper(sys.stdout.write) # type: ignore + + # prepare compiler + parser = parsers.RegexParser( + re.compile( + config.get( + "parser", + "start", + fallback=r"<\?pyhp\s" + ).encode("utf8").decode("unicode_escape") # process escape sequences like \n + ), + re.compile( + config.get( + "parser", + "end", + fallback=r"\s\?>" + ).encode("utf8").decode("unicode_escape") # process escape sequences like \n + ) + ) + builder = generic.GenericCodeBuilder( + config.getint("compiler", "optimization_level", fallback=-1) + ) + compiler = util.Compiler( + parser, + util.Dedenter(builder) if config.getboolean("parser", "dedent", fallback=True) else builder + ) # handle caching - regex = config.get("parser", "regex", fallback="\\<\\?pyhp[\\s](.*?)[\\s]\\?\\>").encode("utf8").decode("unicode_escape") # process escape sequences like \n caching_enabled = config.getboolean("caching", "enable", fallback=True) caching_allowed = config.getboolean("caching", "auto", fallback=False) # if file is not stdin and caching is enabled and wanted or auto_caching is enabled - if check_if_caching(file_path, caching, caching_enabled, caching_allowed): - handler_path = prepare_path(config.get("caching", "handler_path", fallback="/lib/pyhp/cache_handlers/files_mtime.py")) # get neccesary data + if check_if_caching(file, caching, caching_enabled, caching_allowed): + handler_path = config.get( + "caching", + "handler_path", + fallback="/lib/pyhp/cache_handlers/files_mtime.py" + ) # get neccesary data cache_path = prepare_path(config.get("caching", "path", fallback="~/.pyhp/cache")) handler = import_path(handler_path) - handler = handler.Handler(cache_path, os.path.abspath(file_path), config["caching"]) # init handler - if handler.is_available(): # check if caching is possible - cached = True - if handler.is_outdated(): # update cache - code = embed.FromString(prepare_file(file_path), regex, userdata=[file_path, 0]) # set userdata for python_compile - code.process(embed.python_compile) # compile python sections - code.userdata = [{"PyHP": PyHP}, 0] # set userdata for python_execute_compiled - handler.save(code.sections) # just save the code sections - else: # load cache - code = embed.FromIter(handler.load(), userdata=[{"PyHP": PyHP}, 0]) - else: # generate FromString Object - cached = False - code = embed.FromString(prepare_file(file_path), regex, userdata=[{"PyHP": PyHP}, 0]) - handler.close() - else: # same as above - cached = False - code = embed.FromString(prepare_file(file_path), regex, userdata=[{"PyHP": PyHP}, 0]) - - if cached: # run compiled code - code.execute(embed.python_execute_compiled) - else: # run normal code - code.execute(embed.python_execute) + with handler.Handler(cache_path, os.path.abspath(file.name), config["caching"]) as handler: + if handler.is_available(): # check if caching is possible + if handler.is_outdated(): # update cache + code = compiler.compile_file(file) + handler.save(code) + else: # load cache + code = handler.load() + else: + code = compiler.compile_file(file) + else: + code = compiler.compile_file(file) + + try: + for text in code.execute({"PyHP": PyHP}): + sys.stdout.write(text) + finally: # run shutdown functions even if a exception occured + PyHP.run_shutdown_functions() if not PyHP.headers_sent(): # prevent error if no output occured, but not if an exception occured PyHP.send_headers() return 0 # return 0 on success -# prepare path for use -def prepare_path(path): + +def prepare_path(path: str) -> str: + """prepare path for use""" return os.path.expanduser(path) -# import file at path -def import_path(path): + +def import_path(path: str) -> Any: + """import file at path""" sys.path.insert(0, os.path.dirname(path)) # modify module search path path = os.path.splitext(os.path.basename(path))[0] # get filename without .py - path = importlib.import_module(path) # import module + module = importlib.import_module(path) # import module del sys.path[0] # cleanup module search path - return path + return module -# check we should cache -def check_if_caching(file_path, caching, enabled, auto): - possible = file_path != "" # file is not stdin + +def check_if_caching(file: TextIO, caching: bool, enabled: bool, auto: bool) -> bool: + """check if we should cache""" + possible = file != sys.stdin allowed = (caching or auto) and enabled # if caching is wanted and enabled return possible and allowed - -# get code and remove shebang -def prepare_file(path): - if path == "": - code = sys.stdin.read() - else: - with open(path, "r") as fd: - code = fd.read() - if code.startswith("#!"): # remove shebang - code = code.partition("\n")[2] # get all lines except the first line - return code diff --git a/setup.cfg b/setup.cfg index d4242df..845ee28 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,11 +13,20 @@ maintainer_email = robo-eric@gmx.de url = https://github.com/Deric-W/PyHP classifiers = Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Intended Audience :: Developers + Operating System :: Microsoft :: Windows + Operating System :: POSIX + Topic :: Internet :: WWW/HTTP + Topic :: Internet :: WWW/HTTP :: Dynamic Content + Topic :: Internet :: WWW/HTTP :: Dynamic Content :: CGI Tools/Libraries License :: OSI Approved :: GNU General Public License v3 (GPLv3) [options] -packages = pyhp -python_requires = >=3.5 +packages = pyhp, pyhp.compiler +python_requires = >=3.7 [coverage:run] source = pyhp @@ -25,7 +34,16 @@ omit = tests/* [mypy] disallow_redefinition = True -warn-unreachable = True -warn-redundant-casts = True +warn_unreachable = True +warn_redundant_casts = True warn_unused_configs = True -disallow-subclassing-any = True +warn_unused_ignores = True +disallow_subclassing_any = True +disallow_any_generics = True +ignore_missing_imports = True +no_implicit_reexport = True +check_untyped_defs = True +disallow_untyped_defs = True + +[mypy-pyhp.libpyhp] +ignore_errors = True diff --git a/tests/compiler/__init__.py b/tests/compiler/__init__.py new file mode 100644 index 0000000..1d4c0d7 --- /dev/null +++ b/tests/compiler/__init__.py @@ -0,0 +1,90 @@ +#!/usr/bin/python3 + +"""Unit tests for the abc""" + +from __future__ import annotations +import unittest +import unittest.mock +from copy import deepcopy +from typing import Iterator, Tuple, Sequence +from importlib.machinery import ModuleSpec +from pyhp.compiler import CodeBuilder, CodeBuilderDecorator, Parser + + +class PointlessDecorator(CodeBuilderDecorator[CodeBuilder]): + def __init__(self, builder: CodeBuilder) -> None: + self.builder = builder + + def copy(self) -> PointlessDecorator: + return self.__class__(self.builder.copy()) + + +class PointlessParser(Parser): + parse_result: Sequence[Tuple[str, int, bool]] + + def __init__(self, parse_result: Sequence[Tuple[str, int, bool]]) -> None: + self.parse_result = parse_result + + def parse(self, source: str, line_offset: int = 0) -> Iterator[Tuple[str, int, bool]]: + return iter(self.parse_result) + + +class TestCodeBuilderDecorator(unittest.TestCase): + """test the CodeBuilderDecorator abc""" + def test_delegate(self) -> None: + """test if the default implementation delegates the calls""" + builder = unittest.mock.Mock() + decorator = PointlessDecorator(builder) + decorator.add_code("code", 0) + decorator.add_text("text", 0) + spec = ModuleSpec("test", None, origin="", is_package=False) + decorator.code(spec) + builder.add_code.assert_called_with("code", 0) + builder.add_text.assert_called_with("text", 0) + builder.code.assert_called_with(spec) + + def test_detach(self) -> None: + """test the default detach implementation""" + builder = unittest.mock.Mock() + decorator = PointlessDecorator(builder) + self.assertIs(decorator.detach(), builder) + + def test_deepcopy(self) -> None: + """test the deepcopy implementation""" + builder = unittest.mock.Mock() + decorator = PointlessDecorator(builder) + copy1 = decorator.copy() + copy2 = deepcopy(decorator) + self.assertIsInstance(copy1, PointlessDecorator) + self.assertIsInstance(copy2, PointlessDecorator) + self.assertIsNot(copy1, decorator) + self.assertIsNot(copy2, decorator) + self.assertIsNot(copy1.builder, builder) + self.assertIsNot(copy2.builder, builder) + + +class TestParser(unittest.TestCase): + """test the Parser abc""" + def test_build(self) -> None: + """test Parser.build""" + parser = PointlessParser( + [ + ("text0", 0, False), + ("code0", 0, True), + ("text1", 1, False), + ("code1", 2, True), + ("code2", 2, True) + ] + ) + builder = unittest.mock.Mock() + parser.build("", builder) + self.assertEqual( + builder.method_calls, + [ + unittest.mock.call.add_text("text0", 0), + unittest.mock.call.add_code("code0", 0), + unittest.mock.call.add_text("text1", 1), + unittest.mock.call.add_code("code1", 2), + unittest.mock.call.add_code("code2", 2) + ] + ) diff --git a/tests/compiler/test_bytecode.py b/tests/compiler/test_bytecode.py new file mode 100644 index 0000000..a13341c --- /dev/null +++ b/tests/compiler/test_bytecode.py @@ -0,0 +1,153 @@ +#!/usr/bin/python3 + +"""Unit tests for the bytecode code implementation""" + +import unittest +import sys +import ast +import pickle +from importlib.machinery import ModuleSpec +from pyhp.compiler import bytecode + + +TEST_AST = ast.Module( + body=[ + ast.FunctionDef( + name="", + args=ast.arguments( + posonlyargs=[], + args=[], + kwonlyargs=[], + kw_defaults=[], + defaults=[] + ), + body=ast.parse( + "def test():\n yield '1'\n numbers.append('2')\n numbers.append('3')\n yield '4'", + mode="exec" + ).body[0].body, + decorator_list=[], + lineno=1, + col_offset=0 + ) + ], + type_ignores=[] +) + + +class TestCode(unittest.TestCase): + """Test the bytecode code implementation""" + def test_constants(self) -> None: + """test the module level constants""" + spec = ModuleSpec("test", None, origin="this test", is_package=False) + code = bytecode.ByteCode(compile(TEST_AST, spec.origin, "exec"), spec) + variables = {"numbers": []} + list(code.execute(variables)) # modifies variables + variables.pop("") + variables.pop("numbers") + variables.pop("__builtins__") + self.assertEqual( + variables, + { + "__name__": "test", + "__loader__": None, + "__file__": "this test", + "__path__": None, + "__package__": "", + "__cached__": None, + "__spec__": spec + } + ) + + def test_execute(self) -> None: + """test the execution""" + spec = ModuleSpec("test", None, origin="this test", is_package=False) + code = bytecode.ByteCode(compile(TEST_AST, spec.origin, "exec"), spec) + numbers = [] + variables = {"numbers": numbers} + for number in code.execute(variables): + numbers.append(number) + self.assertEqual(numbers, ["1", "2", "3", "4"]) + + def test_pickle(self) -> None: + """test if generic code objects are pickleable""" + spec = ModuleSpec("test", None, origin="this test", is_package=False) + code = bytecode.ByteCode(compile(TEST_AST, spec.origin, "exec"), spec) + code2 = pickle.loads(pickle.dumps(code)) + self.assertEqual(code2.code, code.code) + self.assertEqual(code2.spec, code.spec) + + def test_equal(self) -> None: + """test if equality between generic code objetcs works""" + builder = bytecode.ByteCodeBuilder(-1) + builder.add_code("print(1)", 0) + builder.add_text("X", 0) + spec = ModuleSpec("test", None, origin="this test", is_package=False) + code = builder.code(spec) + code2 = builder.code(spec) + code3 = builder.code(ModuleSpec("X", None, origin="this test", is_package=False)) + builder.add_text("Y", 0) + code4 = builder.code(spec) + self.assertEqual(code, code2) + self.assertNotEqual(code, code3) + self.assertNotEqual(code, code4) + + +class TestBuilder(unittest.TestCase): + """Test the generic code builder""" + def test_build(self) -> None: + """test the building of a generic code object""" + builder = bytecode.ByteCodeBuilder(-1) + builder.add_text("1", 0) + builder.add_code("numbers.append('2')", 1) + builder.add_code("numbers.append('3')", 2) + builder.add_text("4", 3) + spec = ModuleSpec("test", None, origin="this test", is_package=False) + code = builder.code(spec) + code2 = bytecode.ByteCode( + compile(TEST_AST, spec.origin, "exec"), + spec + ) + self.assertEqual(code, code2) + + def test_copy(self) -> None: + """test GenericCodeBuilder.copy""" + builder = bytecode.ByteCodeBuilder(-1) + builder.add_text("1", 0) + builder.add_code("numbers.append('2')", 1) + builder.add_code("numbers.append('3')", 2) + builder.add_text("4", 3) + builder2 = builder.copy() + self.assertEqual(builder.nodes, builder2.nodes) + builder2.add_text("test", 3) + self.assertNotEqual(builder.nodes, builder2.nodes) + + def test_empty(self) -> None: + """test if an empty builder works""" + builder = bytecode.ByteCodeBuilder(-1) + code = builder.code(ModuleSpec("test", None, origin="this test", is_package=False)) + self.assertEqual(list(code.execute({})), []) + + def test_lineno(self) -> None: + """test if line numbers are set correctly""" + builder = bytecode.ByteCodeBuilder(-1) + builder.add_code("x", 99) # offset starts with 0 + spec = ModuleSpec("test", None, origin="this test", is_package=False) + code = builder.code(spec) + try: + list(code.execute({})) + except NameError: + _, _, traceback = sys.exc_info() + self.assertEqual(traceback.tb_next.tb_frame.f_code.co_filename, spec.origin) + self.assertEqual(traceback.tb_next.tb_frame.f_lineno, 100) + else: + raise RuntimeError("bad bytecode executed without error") + + def test_error_lineno(self) -> None: + """test if the line numbers of syntax errors are correct""" + builder = bytecode.ByteCodeBuilder(-1) + try: + builder.add_code("9***9", 99) # offset starts with 0 + except SyntaxError as e: + self.assertEqual(e.lineno, 100) + else: + raise RuntimeError("bad syntax compiled without error") diff --git a/tests/compiler/test_generic.py b/tests/compiler/test_generic.py new file mode 100644 index 0000000..f7983c3 --- /dev/null +++ b/tests/compiler/test_generic.py @@ -0,0 +1,182 @@ +#!/usr/bin/python3 + +"""Unit tests for the generic code implementation""" + +import unittest +import sys +import pickle +from importlib.machinery import ModuleSpec +from pyhp.compiler import generic + + +class TestCode(unittest.TestCase): + """Test the generic code implementation""" + def test_constants(self) -> None: + """test the module level constants""" + spec = ModuleSpec("test", None, origin="this test", is_package=False) + code = generic.GenericCode([], spec) + variables = {} + list(code.execute(variables)) # modifies variables + self.assertEqual( + variables, + { + "__name__": "test", + "__loader__": None, + "__file__": "this test", + "__path__": None, + "__package__": "", + "__cached__": None, + "__spec__": spec + } + ) + + def test_execute(self) -> None: + """test the execution""" + code = generic.GenericCode( + [ + "1", + "2", + compile("numbers.append('3')", "", "exec"), + "4", + compile("numbers.append('5')", "", "exec"), + ], + ModuleSpec("__main__", None, origin=None, is_package=False) + ) + numbers = [] + variables = {"numbers": numbers} + for number in code.execute(variables): + numbers.append(number) + self.assertEqual(numbers, ["1", "2", "3", "4", "5"]) + + def test_pickle(self) -> None: + """test if generic code objects are pickleable""" + code = generic.GenericCode( + [ + "1", + "2", + compile("numbers.append('3')", "", "exec"), + "4", + compile("numbers.append('5')", "", "exec"), + ], + ModuleSpec("test", None, origin="this test", is_package=False) + ) + code2 = pickle.loads(pickle.dumps(code)) + self.assertEqual(code2.sections, code.sections) + self.assertEqual(code2.spec, code.spec) + + def test_equal(self) -> None: + """test if equality between generic code objetcs works""" + code = generic.GenericCode( + [ + "1", + "2", + compile("numbers.append('3')", "", "exec"), + "4", + compile("numbers.append('5')", "", "exec"), + ], + ModuleSpec("test", None, origin="this test", is_package=False) + ) + + code2 = generic.GenericCode( + [ + "1", + "2", + compile("numbers.append('3')", "", "exec"), + "4", + compile("numbers.append('5')", "", "exec"), + ], + ModuleSpec("test", None, origin="this test", is_package=False) + ) + + code3 = generic.GenericCode( + [ + "X", + "2", + compile("numbers.append('3')", "", "exec"), + "4", + compile("numbers.append('5')", "", "exec"), + ], + ModuleSpec("test", None, origin="this test", is_package=False) + ) + + code4 = generic.GenericCode( + [ + "1", + "2", + compile("numbers.append('3')", "", "exec"), + "4", + compile("numbers.append('5')", "", "exec"), + ], + ModuleSpec("X", None, origin="this test", is_package=False) + ) + + self.assertEqual(code, code2) + self.assertNotEqual(code, code3) + self.assertNotEqual(code, code4) + self.assertNotEqual(code3, code4) + + +class TestBuilder(unittest.TestCase): + """Test the generic code builder""" + def test_build(self) -> None: + """test the building of a generic code object""" + builder = generic.GenericCodeBuilder(-1) + builder.add_text("1", 0) + builder.add_code("numbers.append('2')", 0) + builder.add_code("numbers.append('3')", 0) + builder.add_text("4", 0) + spec = ModuleSpec("test", None, origin="this test", is_package=False) + code = builder.code(spec) + code2 = generic.GenericCode( + ( + "1", + compile("numbers.append('2')", "this test", "exec"), + compile("numbers.append('3')", "this test", "exec"), + "4" + ), + spec + ) + self.assertEqual(code, code2) + + def test_copy(self) -> None: + """test GenericCodeBuilder.copy""" + builder = generic.GenericCodeBuilder(-1) + builder.add_text("1", 0) + builder.add_code("numbers.append('2')", 0) + builder.add_code("numbers.append('3')", 0) + builder.add_text("4", 0) + builder2 = builder.copy() + self.assertEqual(builder.sections, builder2.sections) + builder2.add_text("test", 0) + self.assertNotEqual(builder.sections, builder2.sections) + + def test_empty(self) -> None: + """test if an empty builder works""" + builder = generic.GenericCodeBuilder(-1) + code = builder.code(ModuleSpec("test", None, origin="this test", is_package=False)) + self.assertEqual(list(code.execute({})), []) + + def test_lineno(self) -> None: + """test if line numbers are set correctly""" + builder = generic.GenericCodeBuilder(-1) + builder.add_code("x", 99) # offset starts with 0 + spec = ModuleSpec("test", None, origin="this test", is_package=False) + code = builder.code(spec) + try: + list(code.execute({})) + except NameError: + _, _, traceback = sys.exc_info() + self.assertEqual(traceback.tb_next.tb_next.tb_frame.f_code.co_filename, spec.origin) + self.assertEqual(traceback.tb_next.tb_next.tb_frame.f_lineno, 100) + else: + raise RuntimeError("bad generic code executed without error") + + def test_error_lineno(self) -> None: + """test if the line numbers of syntax errors are correct""" + builder = generic.GenericCodeBuilder(-1) + try: + builder.add_code("9***9", 99) # offset starts with 0 + except SyntaxError as e: + self.assertEqual(e.lineno, 100) + else: + raise RuntimeError("bad syntax compiled without error") diff --git a/tests/compiler/test_parsers.py b/tests/compiler/test_parsers.py new file mode 100644 index 0000000..59dc2c9 --- /dev/null +++ b/tests/compiler/test_parsers.py @@ -0,0 +1,51 @@ +#!/usr/bin/python3 + +"""Unit tests for the compiler.parsers module""" + +import re +import unittest +from pyhp.compiler import parsers + + +class TestRegexParser(unittest.TestCase): + """Test the regex parser implementation""" + parser = parsers.RegexParser(re.compile("{"), re.compile("}")) + + def test_syntax(self) -> None: + """test basic syntax""" + sections = list(self.parser.parse("text1{code1}\n{code2}{\ncode3\n}text3\n")) + self.assertEqual( + sections, + [ + ("text1", 0, False), + ("code1", 0, True), + ("\n", 0, False), + ("code2", 1, True), + ("", 1, False), + ("\ncode3\n", 1, True), + ("text3\n", 3, False) + ] + ) + + def test_missing_end(self) -> None: + """test behavior on missing end of code section""" + sections = list(self.parser.parse("text1{code1")) + self.assertEqual( + sections, + [ + ("text1", 0, False), + ("code1", 0, True) + ] + ) + + def test_code_start(self) -> None: + """test behavior on starting code section""" + sections = list(self.parser.parse("{code1}text1")) + self.assertEqual( + sections, + [ + ("", 0, False), + ("code1", 0, True), + ("text1", 0, False) + ] + ) diff --git a/tests/compiler/test_util.py b/tests/compiler/test_util.py new file mode 100644 index 0000000..8db1153 --- /dev/null +++ b/tests/compiler/test_util.py @@ -0,0 +1,120 @@ +#!/usr/bin/python3 + +"""Unit tests for the utilities""" + +import re +import unittest +import unittest.mock +from importlib.machinery import ModuleSpec +from pyhp.compiler import util, parsers, generic + + +class TestCompiler(unittest.TestCase): + """Test the compiler facade""" + + compiler = util.Compiler( + parsers.RegexParser(re.compile(r"\<\?pyhp\s"), re.compile(r"\s\?\>")), + generic.GenericCodeBuilder(-1) + ) + + def test_builder(self) -> None: + """test if the builder is independent from the compiler""" + self.assertFalse(self.compiler.builder() is self.compiler.base_builder) + + def test_string(self) -> None: + """test the compilation of strings""" + source = "text1text2" + code = self.compiler.compile_str(source, "Test") + builder = self.compiler.builder() + self.compiler.parser.build(source, builder) + code2 = builder.code(ModuleSpec("__main__", None, origin="Test", is_package=False)) + self.assertEqual(code, code2) + + def test_file(self) -> None: + """test the compilation of files""" + path = "./tests/embedding/syntax.pyhp" + with open(path, "r") as file: + code = self.compiler.compile_file(file) + + builder = self.compiler.builder() + spec = ModuleSpec("__main__", None, origin=path, is_package=False) + spec.has_location = True + with open(path, "r") as file: + self.compiler.parser.build(file.read(), builder) + self.assertEqual( + code, + builder.code(spec) + ) + + def test_shebang(self) -> None: + """test the handling of shebangs""" + path = "./tests/embedding/shebang.pyhp" + with open(path, "r") as file: + code = self.compiler.compile_file(file) + builder = self.compiler.builder() + spec = ModuleSpec("__main__", None, origin=path, is_package=False) + spec.has_location = True + with open(path, "r") as file: + file.readline() # discard shebang + self.compiler.parser.build(file.read(), builder, 1) + self.assertEqual( + code, + builder.code(spec) + ) + + +class TestDedenter(unittest.TestCase): + """Test the dedenting decorator""" + def test_get_indentation(self) -> None: + """test the detection of indentation""" + self.assertEqual(" \t\t ", util.Dedenter.get_indentation(" \t\t ")) + self.assertEqual("", util.Dedenter.get_indentation("test")) + self.assertEqual("", util.Dedenter.get_indentation("test \t")) + self.assertEqual("", util.Dedenter.get_indentation("")) + self.assertEqual(" ", util.Dedenter.get_indentation(" X \n")) + + def test_dedent(self) -> None: + """test the dedentation process""" + builder = unittest.mock.Mock() + dedenter = util.Dedenter(builder) + dedenter.add_code("test", 0) # no indent + dedenter.add_code(" test", 0) # simple indent + dedenter.add_code(" test\n test", 0) + dedenter.add_code("\t\t#test\ntest\ntest", 0) # ignore non code lines + dedenter.add_code("\t\t#test\n test\n test", 0) + dedenter.add_code("\t\t\ntest\ntest", 0) + dedenter.add_code("\ntest", 0) + builder.add_code.assert_has_calls( + ( + unittest.mock.call("test", 0), + unittest.mock.call("test", 0), + unittest.mock.call("test\ntest", 0), + unittest.mock.call("\t\t#test\ntest\ntest", 0), + unittest.mock.call("\t\t#test\ntest\ntest", 0), + unittest.mock.call("\t\t\ntest\ntest", 0), + unittest.mock.call("\ntest", 0) + ), + any_order=False + ) + with self.assertRaises(util.StartingIndentationError): + dedenter.add_code("\ttest\ntest", 0) # test bad indentation + + def test_copy(self) -> None: + """test Dedenter.copy""" + builder = unittest.mock.Mock() + dedenter = util.Dedenter(builder) + dedenter2 = dedenter.copy() + self.assertFalse(dedenter2 is dedenter) + self.assertFalse(dedenter2.builder is builder) + dedenter.add_text("test", 0) + builder.add_text.assert_called_with("test", 0) + dedenter2.builder.add_text.assert_not_called() + + def test_is_code(self) -> None: + """test Dedenter.is_code""" + self.assertTrue(util.Dedenter.is_code("test")) + self.assertTrue(util.Dedenter.is_code(" test")) + self.assertFalse(util.Dedenter.is_code("#test")) + self.assertFalse(util.Dedenter.is_code(" #test")) + self.assertFalse(util.Dedenter.is_code("")) + self.assertFalse(util.Dedenter.is_code("\t\t \n")) diff --git a/tests/embedding/indentation.output b/tests/embedding/indentation.output index 3b860e4..271b9a8 100644 --- a/tests/embedding/indentation.output +++ b/tests/embedding/indentation.output @@ -11,7 +11,7 @@ But it looks a bit odd inside of an HTML document. Because of this, PyHP will calculate the starting indentation of each section and remove it from all lines of it (12 spaces in this case) -If a line is not starting with the starting indentation a IndentationError will be raised +If a line is not starting with the starting indentation a StartingIndentationError will be raised diff --git a/tests/embedding/indentation.pyhp b/tests/embedding/indentation.pyhp index 1e4b32e..5d24f5f 100644 --- a/tests/embedding/indentation.pyhp +++ b/tests/embedding/indentation.pyhp @@ -13,7 +13,7 @@ print("But it looks a bit odd inside of an HTML document.") print("Because of this, PyHP will calculate the starting indentation of each section") print("and remove it from all lines of it (12 spaces in this case)") - print("If a line is not starting with the starting indentation a IndentationError will be raised") + print("If a line is not starting with the starting indentation a StartingIndentationError will be raised") # comments and empthy lines are ignored by this feature # the tags also dont need to be on the same level ?> diff --git a/tests/embedding/syntax.pyhp b/tests/embedding/syntax.pyhp index a47c5c4..e8be197 100644 --- a/tests/embedding/syntax.pyhp +++ b/tests/embedding/syntax.pyhp @@ -4,12 +4,12 @@ ' tags and one whitespace between") - print("the code and the tags.") - print("Dont forget that the parser ignores python syntax, so '?>' without the ' would") - print("end this section.") +print("basic synatx test") +print("With the default configuration, code needs to be contained") +print("between the '' tags and one whitespace between") +print("the code and the tags.") +print("Dont forget that the parser ignores python syntax, so '?>' without the ' would") +print("end this section.") ?> diff --git a/tests/header/header.output b/tests/header/header.output index 2a7a6cf..e27e2a7 100644 --- a/tests/header/header.output +++ b/tests/header/header.output @@ -14,9 +14,8 @@ not_send: True header - diff --git a/tests/header/header.pyhp b/tests/header/header.pyhp index 882fde3..c97e4e4 100644 --- a/tests/header/header.pyhp +++ b/tests/header/header.pyhp @@ -21,5 +21,6 @@ print("This is an example use of the header function.") print("Because of already occured output, the next line will have no effect.") PyHP.header("pointless: True", http_response_code=400) + ?> diff --git a/tests/test_main.py b/tests/test_main.py index 0f92ea2..b48e53b 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -222,38 +222,19 @@ class CheckInternals(unittest.TestCase): """check internal functions""" def test_check_caching(self) -> None: """test the caching detection""" - self.assertTrue(main.check_if_caching("test", True, True, True)) - self.assertTrue(main.check_if_caching("test", True, True, False)) - self.assertTrue(main.check_if_caching("test", False, True, True)) - self.assertTrue(main.check_if_caching("test", True, True, False)) - self.assertFalse(main.check_if_caching("", True, True, True)) - self.assertFalse(main.check_if_caching("", True, True, False)) - self.assertFalse(main.check_if_caching("", False, True, True)) - self.assertFalse(main.check_if_caching("", True, True, False)) - self.assertFalse(main.check_if_caching("test", True, False, True)) - self.assertFalse(main.check_if_caching("test", True, False, False)) - self.assertFalse(main.check_if_caching("test", False, False, True)) - self.assertFalse(main.check_if_caching("test", False, True, False)) - - def test_prepare_file(self) -> None: - """test the code retrieval""" - file = NamedTemporaryFile("w+", delete=False) - try: - file.write("Test") - file.close() - self.assertEqual(main.prepare_file(file.name), "Test") - finally: - os.unlink(file.name) - - def test_prepare_file_shebang(self) -> None: - """test code tetrieval of files with a shebang""" - file = NamedTemporaryFile("w+", delete=False) - try: - file.write("#!Test\nTest\nTest") - file.close() - self.assertEqual(main.prepare_file(file.name), "Test\nTest") - finally: - os.unlink(file.name) + with open(os.devnull, "r") as fd: + self.assertTrue(main.check_if_caching(fd, True, True, True)) + self.assertTrue(main.check_if_caching(fd, True, True, False)) + self.assertTrue(main.check_if_caching(fd, False, True, True)) + self.assertTrue(main.check_if_caching(fd, True, True, False)) + self.assertFalse(main.check_if_caching(sys.stdin, True, True, True)) + self.assertFalse(main.check_if_caching(sys.stdin, True, True, False)) + self.assertFalse(main.check_if_caching(sys.stdin, False, True, True)) + self.assertFalse(main.check_if_caching(sys.stdin, True, True, False)) + self.assertFalse(main.check_if_caching(fd, True, False, True)) + self.assertFalse(main.check_if_caching(fd, True, False, False)) + self.assertFalse(main.check_if_caching(fd, False, False, True)) + self.assertFalse(main.check_if_caching(fd, False, True, False)) def test_import_path(self) -> None: """test the importing of files"""