Skip to content

Commit

Permalink
Fix trivial inference tests for Python 3.12 support (#31170)
Browse files Browse the repository at this point in the history
* Fix trivial inference for Python 3.12 support

* remove debugging prints

* Address comments

* add unit test for intrinsic op order

* linting

* add unittest.main()

* suggestions
  • Loading branch information
jrmccluskey authored May 9, 2024
1 parent 6197657 commit 287ed38
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 1 deletion.
100 changes: 100 additions & 0 deletions sdks/python/apache_beam/typehints/intrinsic_one_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Defines the actions intrinsic bytecodes have on the frame.
Each function here corresponds to a bytecode documented in
https://docs.python.org/3/library/dis.html . The first argument is a (mutable)
FrameState object, the second the integer opcode argument.
Bytecodes with more complicated behavior (e.g. modifying the program counter)
are handled inline rather than here.
For internal use only; no backwards-compatibility guarantees.
"""
# pytype: skip-file

from . import opcodes


def intrinsic_1_invalid(state, arg):
pass


def intrinsic_print(state, arg):
pass


def intrinsic_import_star(state, arg):
pass


def intrinsic_stopiteration_error(state, arg):
pass


def intrinsic_async_gen_wrap(state, arg):
pass


def intrinsic_unary_positive(state, arg):
opcodes.unary_positive(state, arg)
pass


def intrinsic_list_to_tuple(state, arg):
opcodes.list_to_tuple(state, arg)
pass


def intrinsic_typevar(state, arg):
pass


def intrinsic_paramspec(state, arg):
pass


def intrinsic_typevartuple(state, arg):
pass


def intrinsic_subscript_generic(state, arg):
pass


def intrinsic_typealias(state, arg):
pass


# The order of operations in the table of the intrinsic one operations is
# defined in https://docs.python.org/3/library/dis.html#opcode-CALL_INTRINSIC_1
# and may change between minor versions.
INT_ONE_OPS = tuple([
intrinsic_1_invalid,
intrinsic_print,
intrinsic_import_star,
intrinsic_stopiteration_error,
intrinsic_async_gen_wrap,
intrinsic_unary_positive,
intrinsic_list_to_tuple,
intrinsic_typevar,
intrinsic_paramspec,
intrinsic_typevartuple,
intrinsic_subscript_generic,
intrinsic_typealias
])
40 changes: 40 additions & 0 deletions sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Tests for apache_beam.typehints.intrinsic_one_ops."""

# pytype: skip-file

import dis
import sys
import unittest

from apache_beam.typehints import intrinsic_one_ops


class IntrinsicOneOpsTest(unittest.TestCase):
def test_unary_intrinsic_ops_are_in_the_same_order_as_in_cpython(self):
if sys.version_info >= (3, 12):
dis_order = dis.__dict__['_intrinsic_1_descs']
beam_ops = [fn.__name_upper() for fn in intrinsic_one_ops.INT_ONE_OPS]
for fn in intrinsic_one_ops.INT_ONE_OPS:
beam_ops.append(fn.__name__.upper())
self.assertListEqual(dis_order, beam_ops)


if __name__ == '__main__':
unittest.main()
65 changes: 65 additions & 0 deletions sdks/python/apache_beam/typehints/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,26 @@ def nop(unused_state, unused_arg):
pass


resume = nop


def pop_top(state, unused_arg):
state.stack.pop()


def end_for(state, unused_arg):
state.stack.pop()
state.stack.pop()


def end_send(state, unused_arg):
del state.stack[-2]


def copy(state, arg):
state.stack.append(state.stack[-arg])


def rot_n(state, n):
state.stack[-n:] = [state.stack[-1]] + state.stack[-n:-1]

Expand Down Expand Up @@ -188,6 +204,31 @@ def store_subscr(unused_state, unused_args):
pass


def binary_slice(state, args):
_ = state.stack.pop()
_ = state.stack.pop()
base = Const.unwrap(state.stack.pop())
if base is str:
out = base
elif isinstance(base, typehints.IndexableTypeConstraint):
out = base
else:
out = element_type(base)
state.stack.append(out)


def store_slice(state, args):
"""Clears elements off the stack like it was constructing a
container, but leaves the container type back at stack[-1]
since that's all that is relevant for type checking.
"""
_ = state.stack.pop() # End
_ = state.stack.pop() # Start
container = state.stack.pop() # Container type
_ = state.stack.pop() # Values that would go in container
state.stack.append(container)


print_item = pop_top
print_newline = nop

Expand Down Expand Up @@ -347,6 +388,14 @@ def load_attr(state, arg):
Will replace with Any for builtin methods, but these don't have bytecode in
CPython so that's okay.
"""
if (sys.version_info.major, sys.version_info.minor) >= (3, 12):
# Load attribute's arg was bit-shifted in 3.12 to also allow for
# adding extra information to the stack based on the lower byte,
# so we have to adjust it back.
#
# See https://docs.python.org/3/library/dis.html#opcode-LOAD_ATTR
# for more information.
arg = arg >> 1
o = state.stack.pop()
name = state.get_name(arg)
state.stack.append(_getattr(o, name))
Expand Down Expand Up @@ -417,6 +466,14 @@ def load_fast(state, arg):
state.stack.append(state.vars[arg])


load_fast_check = load_fast


def load_fast_and_clear(state, arg):
state.stack.append(state.vars[arg])
del state.vars[arg]


def store_fast(state, arg):
state.vars[arg] = state.stack.pop()

Expand All @@ -425,6 +482,14 @@ def delete_fast(state, arg):
state.vars[arg] = Any # really an error


def swap(state, arg):
state.stack[-arg], state.stack[-1] = state.stack[-1], state.stack[-arg]


def reraise(state, arg):
pass


# bpo-43683 Adds GEN_START in Python 3.10, but removed in Python 3.11
# https://github.com/python/cpython/pull/25138
def gen_start(state, arg):
Expand Down
30 changes: 29 additions & 1 deletion sdks/python/apache_beam/typehints/trivial_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
dis.dis(f)
from . import opcodes
simple_ops = dict((k.upper(), v) for k, v in opcodes.__dict__.items())
from . import intrinsic_one_ops

co = f.__code__
code = co.co_code
Expand Down Expand Up @@ -432,6 +433,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
elif op in dis.haslocal:
print('(' + co.co_varnames[arg] + ')', end=' ')
elif op in dis.hascompare:
if (sys.version_info.major, sys.version_info.minor) >= (3, 12):
# In 3.12 this arg was bit-shifted. Shifting it back avoids an
# out-of-index.
arg = arg >> 4
print('(' + dis.cmp_op[arg] + ')', end=' ')
elif op in dis.hasfree:
if free is None:
Expand Down Expand Up @@ -578,7 +583,13 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
state = None
elif opname in ('POP_JUMP_IF_TRUE', 'POP_JUMP_IF_FALSE'):
state.stack.pop()
jmp = arg * jump_multiplier
# The arg was changed to be a relative delta instead of an absolute
# in 3.11, and became a full instruction instead of a
# pseudo-instruction in 3.12
if (sys.version_info.major, sys.version_info.minor) >= (3, 12):
jmp = pc + arg * jump_multiplier
else:
jmp = arg * jump_multiplier
jmp_state = state.copy()
elif opname in ('POP_JUMP_FORWARD_IF_TRUE', 'POP_JUMP_FORWARD_IF_FALSE'):
state.stack.pop()
Expand Down Expand Up @@ -608,6 +619,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
state.stack.pop()
elif opname == 'FOR_ITER':
jmp = pc + arg * jump_multiplier
if sys.version_info >= (3, 12):
# The jump is relative to the next instruction after a cache call,
# so jump 4 more bytes.
jmp += 4
jmp_state = state.copy()
jmp_state.stack.pop()
state.stack.append(element_type(state.stack[-1]))
Expand Down Expand Up @@ -641,6 +656,19 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
# No-op introduced in 3.11. Without handling this some
# instructions have functionally > 2 byte size.
pass
elif opname == 'RETURN_CONST':
# Introduced in 3.12. Handles returning constants directly
# instead of having a LOAD_CONST before a RETURN_VALUE.
returns.add(state.const_type(arg))
state = None
elif opname == 'CALL_INTRINSIC_1':
# Introduced in 3.12. The arg is an index into a table of
# operations reproduced in INT_ONE_OPS. Not all ops are
# relevant for our type checking infrastructure.
int_op = intrinsic_one_ops.INT_ONE_OPS[arg]
if debug:
print("Executing intrinsic one op", int_op.__name__.upper())
int_op(state, arg)

else:
raise TypeInferenceError('unable to handle %s' % opname)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def testTupleListComprehension(self):
self.assertReturnType(
typehints.List[int],
lambda xs: [x for x in xs], [typehints.Tuple[int, int, int]])

self.assertReturnType(
typehints.List[typehints.Union[int, float]],
lambda xs: [x for x in xs], [typehints.Tuple[int, float]])
Expand Down

0 comments on commit 287ed38

Please sign in to comment.