From 287ed388a68e2013dab29f17e9f470c279645d3a Mon Sep 17 00:00:00 2001 From: Jack McCluskey <34928439+jrmccluskey@users.noreply.github.com> Date: Wed, 8 May 2024 20:29:16 -0400 Subject: [PATCH] Fix trivial inference tests for Python 3.12 support (#31170) * Fix trivial inference for Python 3.12 support * remove debugging prints * Address comments * add unit test for intrinsic op order * linting * add unittest.main() * suggestions --- .../typehints/intrinsic_one_ops.py | 100 ++++++++++++++++++ .../typehints/intrinsic_one_ops_test.py | 40 +++++++ sdks/python/apache_beam/typehints/opcodes.py | 65 ++++++++++++ .../typehints/trivial_inference.py | 30 +++++- .../typehints/trivial_inference_test.py | 1 + 5 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 sdks/python/apache_beam/typehints/intrinsic_one_ops.py create mode 100644 sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py diff --git a/sdks/python/apache_beam/typehints/intrinsic_one_ops.py b/sdks/python/apache_beam/typehints/intrinsic_one_ops.py new file mode 100644 index 000000000000..c82141cb404f --- /dev/null +++ b/sdks/python/apache_beam/typehints/intrinsic_one_ops.py @@ -0,0 +1,100 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Defines the actions intrinsic bytecodes have on the frame. + +Each function here corresponds to a bytecode documented in +https://docs.python.org/3/library/dis.html . The first argument is a (mutable) +FrameState object, the second the integer opcode argument. + +Bytecodes with more complicated behavior (e.g. modifying the program counter) +are handled inline rather than here. + +For internal use only; no backwards-compatibility guarantees. +""" +# pytype: skip-file + +from . import opcodes + + +def intrinsic_1_invalid(state, arg): + pass + + +def intrinsic_print(state, arg): + pass + + +def intrinsic_import_star(state, arg): + pass + + +def intrinsic_stopiteration_error(state, arg): + pass + + +def intrinsic_async_gen_wrap(state, arg): + pass + + +def intrinsic_unary_positive(state, arg): + opcodes.unary_positive(state, arg) + pass + + +def intrinsic_list_to_tuple(state, arg): + opcodes.list_to_tuple(state, arg) + pass + + +def intrinsic_typevar(state, arg): + pass + + +def intrinsic_paramspec(state, arg): + pass + + +def intrinsic_typevartuple(state, arg): + pass + + +def intrinsic_subscript_generic(state, arg): + pass + + +def intrinsic_typealias(state, arg): + pass + + +# The order of operations in the table of the intrinsic one operations is +# defined in https://docs.python.org/3/library/dis.html#opcode-CALL_INTRINSIC_1 +# and may change between minor versions. +INT_ONE_OPS = tuple([ + intrinsic_1_invalid, + intrinsic_print, + intrinsic_import_star, + intrinsic_stopiteration_error, + intrinsic_async_gen_wrap, + intrinsic_unary_positive, + intrinsic_list_to_tuple, + intrinsic_typevar, + intrinsic_paramspec, + intrinsic_typevartuple, + intrinsic_subscript_generic, + intrinsic_typealias +]) diff --git a/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py new file mode 100644 index 000000000000..8291ef2df5bb --- /dev/null +++ b/sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py @@ -0,0 +1,40 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Tests for apache_beam.typehints.intrinsic_one_ops.""" + +# pytype: skip-file + +import dis +import sys +import unittest + +from apache_beam.typehints import intrinsic_one_ops + + +class IntrinsicOneOpsTest(unittest.TestCase): + def test_unary_intrinsic_ops_are_in_the_same_order_as_in_cpython(self): + if sys.version_info >= (3, 12): + dis_order = dis.__dict__['_intrinsic_1_descs'] + beam_ops = [fn.__name_upper() for fn in intrinsic_one_ops.INT_ONE_OPS] + for fn in intrinsic_one_ops.INT_ONE_OPS: + beam_ops.append(fn.__name__.upper()) + self.assertListEqual(dis_order, beam_ops) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdks/python/apache_beam/typehints/opcodes.py b/sdks/python/apache_beam/typehints/opcodes.py index 5a35b56b9321..11e607543e93 100644 --- a/sdks/python/apache_beam/typehints/opcodes.py +++ b/sdks/python/apache_beam/typehints/opcodes.py @@ -78,10 +78,26 @@ def nop(unused_state, unused_arg): pass +resume = nop + + def pop_top(state, unused_arg): state.stack.pop() +def end_for(state, unused_arg): + state.stack.pop() + state.stack.pop() + + +def end_send(state, unused_arg): + del state.stack[-2] + + +def copy(state, arg): + state.stack.append(state.stack[-arg]) + + def rot_n(state, n): state.stack[-n:] = [state.stack[-1]] + state.stack[-n:-1] @@ -188,6 +204,31 @@ def store_subscr(unused_state, unused_args): pass +def binary_slice(state, args): + _ = state.stack.pop() + _ = state.stack.pop() + base = Const.unwrap(state.stack.pop()) + if base is str: + out = base + elif isinstance(base, typehints.IndexableTypeConstraint): + out = base + else: + out = element_type(base) + state.stack.append(out) + + +def store_slice(state, args): + """Clears elements off the stack like it was constructing a + container, but leaves the container type back at stack[-1] + since that's all that is relevant for type checking. + """ + _ = state.stack.pop() # End + _ = state.stack.pop() # Start + container = state.stack.pop() # Container type + _ = state.stack.pop() # Values that would go in container + state.stack.append(container) + + print_item = pop_top print_newline = nop @@ -347,6 +388,14 @@ def load_attr(state, arg): Will replace with Any for builtin methods, but these don't have bytecode in CPython so that's okay. """ + if (sys.version_info.major, sys.version_info.minor) >= (3, 12): + # Load attribute's arg was bit-shifted in 3.12 to also allow for + # adding extra information to the stack based on the lower byte, + # so we have to adjust it back. + # + # See https://docs.python.org/3/library/dis.html#opcode-LOAD_ATTR + # for more information. + arg = arg >> 1 o = state.stack.pop() name = state.get_name(arg) state.stack.append(_getattr(o, name)) @@ -417,6 +466,14 @@ def load_fast(state, arg): state.stack.append(state.vars[arg]) +load_fast_check = load_fast + + +def load_fast_and_clear(state, arg): + state.stack.append(state.vars[arg]) + del state.vars[arg] + + def store_fast(state, arg): state.vars[arg] = state.stack.pop() @@ -425,6 +482,14 @@ def delete_fast(state, arg): state.vars[arg] = Any # really an error +def swap(state, arg): + state.stack[-arg], state.stack[-1] = state.stack[-1], state.stack[-arg] + + +def reraise(state, arg): + pass + + # bpo-43683 Adds GEN_START in Python 3.10, but removed in Python 3.11 # https://github.com/python/cpython/pull/25138 def gen_start(state, arg): diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index a880b5c70ea1..8b6f43abaa83 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -359,6 +359,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): dis.dis(f) from . import opcodes simple_ops = dict((k.upper(), v) for k, v in opcodes.__dict__.items()) + from . import intrinsic_one_ops co = f.__code__ code = co.co_code @@ -432,6 +433,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): elif op in dis.haslocal: print('(' + co.co_varnames[arg] + ')', end=' ') elif op in dis.hascompare: + if (sys.version_info.major, sys.version_info.minor) >= (3, 12): + # In 3.12 this arg was bit-shifted. Shifting it back avoids an + # out-of-index. + arg = arg >> 4 print('(' + dis.cmp_op[arg] + ')', end=' ') elif op in dis.hasfree: if free is None: @@ -578,7 +583,13 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): state = None elif opname in ('POP_JUMP_IF_TRUE', 'POP_JUMP_IF_FALSE'): state.stack.pop() - jmp = arg * jump_multiplier + # The arg was changed to be a relative delta instead of an absolute + # in 3.11, and became a full instruction instead of a + # pseudo-instruction in 3.12 + if (sys.version_info.major, sys.version_info.minor) >= (3, 12): + jmp = pc + arg * jump_multiplier + else: + jmp = arg * jump_multiplier jmp_state = state.copy() elif opname in ('POP_JUMP_FORWARD_IF_TRUE', 'POP_JUMP_FORWARD_IF_FALSE'): state.stack.pop() @@ -608,6 +619,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): state.stack.pop() elif opname == 'FOR_ITER': jmp = pc + arg * jump_multiplier + if sys.version_info >= (3, 12): + # The jump is relative to the next instruction after a cache call, + # so jump 4 more bytes. + jmp += 4 jmp_state = state.copy() jmp_state.stack.pop() state.stack.append(element_type(state.stack[-1])) @@ -641,6 +656,19 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): # No-op introduced in 3.11. Without handling this some # instructions have functionally > 2 byte size. pass + elif opname == 'RETURN_CONST': + # Introduced in 3.12. Handles returning constants directly + # instead of having a LOAD_CONST before a RETURN_VALUE. + returns.add(state.const_type(arg)) + state = None + elif opname == 'CALL_INTRINSIC_1': + # Introduced in 3.12. The arg is an index into a table of + # operations reproduced in INT_ONE_OPS. Not all ops are + # relevant for our type checking infrastructure. + int_op = intrinsic_one_ops.INT_ONE_OPS[arg] + if debug: + print("Executing intrinsic one op", int_op.__name__.upper()) + int_op(state, arg) else: raise TypeInferenceError('unable to handle %s' % opname) diff --git a/sdks/python/apache_beam/typehints/trivial_inference_test.py b/sdks/python/apache_beam/typehints/trivial_inference_test.py index 4341d11d3604..48ccc8a6a2ed 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference_test.py +++ b/sdks/python/apache_beam/typehints/trivial_inference_test.py @@ -206,6 +206,7 @@ def testTupleListComprehension(self): self.assertReturnType( typehints.List[int], lambda xs: [x for x in xs], [typehints.Tuple[int, int, int]]) + self.assertReturnType( typehints.List[typehints.Union[int, float]], lambda xs: [x for x in xs], [typehints.Tuple[int, float]])