From d9f292caf0ec4b885022c49368ae66ab7fcd188f Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 00:29:27 +0900 Subject: [PATCH 1/9] Eliminate redundant refcounting for BINARY_OP_EXTEND --- Python/bytecodes.c | 12 +++++++++--- Python/optimizer_bytecodes.c | 6 ++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9058a5210e50f9..f7551bfbf3c2db 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -829,7 +829,7 @@ dummy_func( DEOPT_IF(!res); } - op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { + op(_BINARY_OP_EXTEND, (descr/4, left, right -- res, l, r)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); @@ -838,12 +838,18 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = d->action(left_o, right_o); - DECREF_INPUTS(); + if (res_o == NULL) { + ERROR_NO_POP(); + } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + DEAD(left); + DEAD(right); } macro(BINARY_OP_EXTEND) = - unused/1 + _GUARD_BINARY_OP_EXTEND + rewind/-4 + _BINARY_OP_EXTEND; + unused/1 + _GUARD_BINARY_OP_EXTEND + rewind/-4 + _BINARY_OP_EXTEND + POP_TOP + POP_TOP; macro(BINARY_OP_INPLACE_ADD_UNICODE) = _GUARD_TOS_UNICODE + _GUARD_NOS_UNICODE + unused/5 + _BINARY_OP_INPLACE_ADD_UNICODE; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 876ba7c6de7482..9ad915ea01a54b 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -310,6 +310,12 @@ dummy_func(void) { r = right; } + op(_BINARY_OP_EXTEND, (left, right -- res, l, r)) { + res = sym_new_not_null(ctx); + l = left; + r = right; + } + op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- res)) { if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); From 8b9d977a8d2b3372c973896cb348a06f27faa6d3 Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 00:31:14 +0900 Subject: [PATCH 2/9] Regen all --- Include/internal/pycore_opcode_metadata.h | 4 +-- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 8 ++--- Modules/_testinternalcapi/test_cases.c.h | 39 +++++++++++++++-------- Python/executor_cases.c.h | 28 ++++++++-------- Python/generated_cases.c.h | 39 +++++++++++++++-------- Python/optimizer_cases.c.h | 15 +++++++-- 7 files changed, 86 insertions(+), 49 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index e3f7f5a6f0bb16..80c11b753be7e6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1094,7 +1094,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, - [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, @@ -1347,7 +1347,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_OP_ADD_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, - [BINARY_OP_EXTEND] = { .nuops = 2, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 } } }, + [BINARY_OP_EXTEND] = { .nuops = 4, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 }, { _POP_TOP, OPARG_SIMPLE, 5 }, { _POP_TOP, OPARG_SIMPLE, 5 } } }, [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8fd7cef3368e13..d23b447fb518f8 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -379,7 +379,7 @@ extern "C" { #define _BINARY_OP_ADD_UNICODE_r03 576 #define _BINARY_OP_ADD_UNICODE_r13 577 #define _BINARY_OP_ADD_UNICODE_r23 578 -#define _BINARY_OP_EXTEND_r21 579 +#define _BINARY_OP_EXTEND_r23 579 #define _BINARY_OP_INPLACE_ADD_UNICODE_r21 580 #define _BINARY_OP_MULTIPLY_FLOAT_r03 581 #define _BINARY_OP_MULTIPLY_FLOAT_r13 582 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 7989c2f33662e4..4b8b9fc235d77f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -115,7 +115,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG, + [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, @@ -1101,7 +1101,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { .entries = { { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 2, _BINARY_OP_EXTEND_r21 }, + { 3, 2, _BINARY_OP_EXTEND_r23 }, { -1, -1, -1 }, }, }, @@ -3568,7 +3568,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_BINARY_OP_ADD_UNICODE_r23] = _BINARY_OP_ADD_UNICODE, [_BINARY_OP_INPLACE_ADD_UNICODE_r21] = _BINARY_OP_INPLACE_ADD_UNICODE, [_GUARD_BINARY_OP_EXTEND_r22] = _GUARD_BINARY_OP_EXTEND, - [_BINARY_OP_EXTEND_r21] = _BINARY_OP_EXTEND, + [_BINARY_OP_EXTEND_r23] = _BINARY_OP_EXTEND, [_BINARY_SLICE_r31] = _BINARY_SLICE, [_STORE_SLICE_r30] = _STORE_SLICE, [_BINARY_OP_SUBSCR_LIST_INT_r23] = _BINARY_OP_SUBSCR_LIST_INT, @@ -4097,7 +4097,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_BINARY_OP_ADD_UNICODE_r13] = "_BINARY_OP_ADD_UNICODE_r13", [_BINARY_OP_ADD_UNICODE_r23] = "_BINARY_OP_ADD_UNICODE_r23", [_BINARY_OP_EXTEND] = "_BINARY_OP_EXTEND", - [_BINARY_OP_EXTEND_r21] = "_BINARY_OP_EXTEND_r21", + [_BINARY_OP_EXTEND_r23] = "_BINARY_OP_EXTEND_r23", [_BINARY_OP_INPLACE_ADD_UNICODE] = "_BINARY_OP_INPLACE_ADD_UNICODE", [_BINARY_OP_INPLACE_ADD_UNICODE_r21] = "_BINARY_OP_INPLACE_ADD_UNICODE_r21", [_BINARY_OP_MULTIPLY_FLOAT] = "_BINARY_OP_MULTIPLY_FLOAT", diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index fb584314ef40bc..a7d589dbe7b274 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -317,6 +317,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; /* Skip 1 cache entry */ // _GUARD_BINARY_OP_EXTEND { @@ -348,22 +351,32 @@ STAT_INC(BINARY_OP, hit); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = d->action(left_o, right_o); - _PyStackRef tmp = right; - right = PyStackRef_NULL; - stack_pointer[-1] = right; - PyStackRef_CLOSE(tmp); - tmp = left; - left = PyStackRef_NULL; - stack_pointer[-2] = left; - PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + if (res_o == NULL) { + JUMP_TO_LABEL(error); + } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } - stack_pointer[0] = res; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6469deb238f5b0..56d0b192edba7a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5143,12 +5143,14 @@ break; } - case _BINARY_OP_EXTEND_r21: { + case _BINARY_OP_EXTEND_r23: { CHECK_CURRENT_CACHED_VALUES(2); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef right; _PyStackRef left; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; right = _stack_item_1; @@ -5165,22 +5167,20 @@ ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = d->action(left_o, right_o); - _PyStackRef tmp = right; - right = PyStackRef_NULL; - stack_pointer[-1] = right; - PyStackRef_CLOSE(tmp); - tmp = left; - left = PyStackRef_NULL; - stack_pointer[-2] = left; - PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + if (res_o == NULL) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + _tos_cache2 = r; + _tos_cache1 = l; _tos_cache0 = res; - _tos_cache1 = PyStackRef_ZERO_BITS; - _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); + SET_CURRENT_CACHED_VALUES(3); + stack_pointer += -2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index b5ae600c095e67..9df6b2f70f96df 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -317,6 +317,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; /* Skip 1 cache entry */ // _GUARD_BINARY_OP_EXTEND { @@ -348,22 +351,32 @@ STAT_INC(BINARY_OP, hit); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = d->action(left_o, right_o); - _PyStackRef tmp = right; - right = PyStackRef_NULL; - stack_pointer[-1] = right; - PyStackRef_CLOSE(tmp); - tmp = left; - left = PyStackRef_NULL; - stack_pointer[-2] = left; - PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + if (res_o == NULL) { + JUMP_TO_LABEL(error); + } res = PyStackRef_FromPyObjectSteal(res_o); + l = left; + r = right; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } - stack_pointer[0] = res; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 012fe16bfd9096..d5b9a4159d7341 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -882,11 +882,22 @@ } case _BINARY_OP_EXTEND: { + JitOptRef right; + JitOptRef left; JitOptRef res; + JitOptRef l; + JitOptRef r; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyObject *descr = (PyObject *)this_instr->operand0; res = sym_new_not_null(ctx); - CHECK_STACK_BOUNDS(-1); + l = left; + r = right; + CHECK_STACK_BOUNDS(1); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } From b3d8e3eee3c484ee8f17d5d5f61f50023888665e Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 00:31:24 +0900 Subject: [PATCH 3/9] Add basic test --- Lib/test/test_capi/test_opt.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 79c7f530b8ae89..61d15a718ef646 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2880,6 +2880,25 @@ def testfunc(n): self.assertIn("_POP_TOP_NOP", uops) self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_binary_op_extend_float_long_add_refcount_elimination(self): + def testfunc(n): + a = 1.5 + b = 2 + res = 0.0 + for _ in range(n): + res = a + b + return res + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 3.5) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + for uop in uops: + print(uop) + self.assertIn("_BINARY_OP_EXTEND", uops) + self.assertIn("_POP_TOP_NOP", uops) + self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_remove_guard_for_slice_list(self): def f(n): for i in range(n): From acd784a95851fc0c63c1c2f718420a43d6fc6d4a Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 01:26:17 +0900 Subject: [PATCH 4/9] Blurb this --- .../2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst new file mode 100644 index 00000000000000..b3582197f45dda --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-26-12.gh-issue-144005.Z3O33m.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting from ``BINARY_OP_EXTEND``. From 3884cc2dde7a87f7e3a37ee8196f65d6d87497fd Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 01:52:08 +0900 Subject: [PATCH 5/9] Eliminate redundant refcounting in the JIT for BINARY_OP --- Python/bytecodes.c | 9 ++++++--- Python/optimizer_bytecodes.c | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f7551bfbf3c2db..a84a2c276e175f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5119,7 +5119,7 @@ dummy_func( assert(oparg <= NB_OPARG_LAST); } - op(_BINARY_OP, (lhs, rhs -- res)) { + op(_BINARY_OP, (lhs, rhs -- res, l, r)) { PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); @@ -5129,10 +5129,13 @@ dummy_func( ERROR_NO_POP(); } res = PyStackRef_FromPyObjectSteal(res_o); - DECREF_INPUTS(); + l = lhs; + r = rhs; + DEAD(lhs); + DEAD(rhs); } - macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP; + macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP + POP_TOP + POP_TOP; pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 9ad915ea01a54b..16eb3a8e82b6b4 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -210,8 +210,10 @@ dummy_func(void) { sym_set_type(left, &PyFloat_Type); } - op(_BINARY_OP, (lhs, rhs -- res)) { + op(_BINARY_OP, (lhs, rhs -- res, l, r)) { REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs, res); + l = lhs; + r = rhs; bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); From bc9d108e259bcbc6c36b654e29b29664c3837a1e Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 01:53:44 +0900 Subject: [PATCH 6/9] Generate all cases --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 6 ++--- Modules/_testinternalcapi/test_cases.c.h | 27 ++++++++++++++++------- Python/executor_cases.c.h | 26 ++++++++-------------- Python/generated_cases.c.h | 27 ++++++++++++++++------- Python/optimizer_cases.c.h | 26 +++++++++++++++++----- 7 files changed, 72 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 80c11b753be7e6..95dee5d352670d 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1343,7 +1343,7 @@ extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256]; #ifdef NEED_OPCODE_METADATA const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { - [BINARY_OP] = { .nuops = 1, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 } } }, + [BINARY_OP] = { .nuops = 3, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 } } }, [BINARY_OP_ADD_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index d23b447fb518f8..459535a7e6a74b 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -369,7 +369,7 @@ extern "C" { #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE #define MAX_UOP_ID 568 -#define _BINARY_OP_r21 569 +#define _BINARY_OP_r23 569 #define _BINARY_OP_ADD_FLOAT_r03 570 #define _BINARY_OP_ADD_FLOAT_r13 571 #define _BINARY_OP_ADD_FLOAT_r23 572 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 4b8b9fc235d77f..44795aed9f418d 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -2919,7 +2919,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { .entries = { { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 2, _BINARY_OP_r21 }, + { 3, 2, _BINARY_OP_r23 }, { -1, -1, -1 }, }, }, @@ -3929,7 +3929,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_COPY_3_r23] = _COPY_3, [_COPY_3_r33] = _COPY_3, [_COPY_r01] = _COPY, - [_BINARY_OP_r21] = _BINARY_OP, + [_BINARY_OP_r23] = _BINARY_OP, [_SWAP_2_r02] = _SWAP_2, [_SWAP_2_r12] = _SWAP_2, [_SWAP_2_r22] = _SWAP_2, @@ -4083,7 +4083,7 @@ const uint16_t _PyUop_SpillsAndReloads[4][4] = { const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_BINARY_OP] = "_BINARY_OP", - [_BINARY_OP_r21] = "_BINARY_OP_r21", + [_BINARY_OP_r23] = "_BINARY_OP_r23", [_BINARY_OP_ADD_FLOAT] = "_BINARY_OP_ADD_FLOAT", [_BINARY_OP_ADD_FLOAT_r03] = "_BINARY_OP_ADD_FLOAT_r03", [_BINARY_OP_ADD_FLOAT_r13] = "_BINARY_OP_ADD_FLOAT_r13", diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index a7d589dbe7b274..3c9eb9194366e8 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -32,6 +32,9 @@ _PyStackRef lhs; _PyStackRef rhs; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; // _SPECIALIZE_BINARY_OP { rhs = stack_pointer[-1]; @@ -65,18 +68,26 @@ JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = lhs; + r = rhs; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = lhs; - lhs = res; - stack_pointer[-2] = lhs; - PyStackRef_CLOSE(tmp); - tmp = rhs; - rhs = PyStackRef_NULL; - stack_pointer[-1] = rhs; - PyStackRef_CLOSE(tmp); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } DISPATCH(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 56d0b192edba7a..2bf24334d8eb1a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -16594,12 +16594,14 @@ break; } - case _BINARY_OP_r21: { + case _BINARY_OP_r23: { CHECK_CURRENT_CACHED_VALUES(2); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef rhs; _PyStackRef lhs; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; oparg = CURRENT_OPARG(); @@ -16620,23 +16622,13 @@ JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = lhs; - lhs = res; - stack_pointer[-2] = lhs; - PyStackRef_CLOSE(tmp); - tmp = rhs; - rhs = PyStackRef_NULL; - stack_pointer[-1] = rhs; - PyStackRef_CLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + l = lhs; + r = rhs; + _tos_cache2 = r; + _tos_cache1 = l; _tos_cache0 = res; - _tos_cache1 = PyStackRef_ZERO_BITS; - _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); - stack_pointer += -1; + SET_CURRENT_CACHED_VALUES(3); + stack_pointer += -2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 9df6b2f70f96df..ae9e1e5421b87b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -32,6 +32,9 @@ _PyStackRef lhs; _PyStackRef rhs; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; // _SPECIALIZE_BINARY_OP { rhs = stack_pointer[-1]; @@ -65,18 +68,26 @@ JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = lhs; + r = rhs; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = lhs; - lhs = res; - stack_pointer[-2] = lhs; - PyStackRef_CLOSE(tmp); - tmp = rhs; - rhs = PyStackRef_NULL; - stack_pointer[-1] = rhs; - PyStackRef_CLOSE(tmp); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } DISPATCH(); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index d5b9a4159d7341..c8990a6d07a5cc 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -3616,6 +3616,8 @@ JitOptRef rhs; JitOptRef lhs; JitOptRef res; + JitOptRef l; + JitOptRef r; rhs = stack_pointer[-1]; lhs = stack_pointer[-2]; if ( @@ -3627,6 +3629,8 @@ _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym); _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym); _PyStackRef res_stackref; + _PyStackRef l_stackref; + _PyStackRef r_stackref; /* Start of uop copied from bytecodes for constant evaluation */ PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); @@ -3636,21 +3640,29 @@ JUMP_TO_LABEL(error); } res_stackref = PyStackRef_FromPyObjectSteal(res_o); + l_stackref = lhs; + r_stackref = rhs; /* End of uop copied from bytecodes for constant evaluation */ + (void)l_stackref; + (void)r_stackref; res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { - // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result - ADD_OP(_POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + // Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); } } - CHECK_STACK_BOUNDS(-1); + CHECK_STACK_BOUNDS(1); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } + l = lhs; + r = rhs; bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); @@ -3684,9 +3696,11 @@ else { res = sym_new_type(ctx, &PyFloat_Type); } - CHECK_STACK_BOUNDS(-1); + CHECK_STACK_BOUNDS(1); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } From 8e1bcaa84cbdd4ac446b11453ddedadb34ef11f2 Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 01:53:57 +0900 Subject: [PATCH 7/9] Add test --- Lib/test/test_capi/test_opt.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 61d15a718ef646..781ce6b3a061cf 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2899,6 +2899,31 @@ def testfunc(n): self.assertIn("_POP_TOP_NOP", uops) self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_binary_op_refcount_elimination(self): + class CustomAdder: + def __init__(self, val): + self.val = val + def __add__(self, other): + return CustomAdder(self.val + other.val) + + def testfunc(n): + a = CustomAdder(1) + b = CustomAdder(2) + res = None + for _ in range(n): + res = a + b + return res.val if res else 0 + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 3) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + for uop in uops: + print(uop) + self.assertIn("_BINARY_OP", uops) + self.assertIn("_POP_TOP_NOP", uops) + self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_remove_guard_for_slice_list(self): def f(n): for i in range(n): From e1ccc3a9d682ad6311520a244ef13d64b505af30 Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 03:06:32 +0900 Subject: [PATCH 8/9] Remove the unrelated changes --- Lib/test/test_capi/test_opt.py | 27 --------------------------- Python/bytecodes.c | 9 +++------ Python/optimizer_bytecodes.c | 4 +--- 3 files changed, 4 insertions(+), 36 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 781ce6b3a061cf..00dbbb65776058 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2893,37 +2893,10 @@ def testfunc(n): self.assertEqual(res, 3.5) self.assertIsNotNone(ex) uops = get_opnames(ex) - for uop in uops: - print(uop) self.assertIn("_BINARY_OP_EXTEND", uops) self.assertIn("_POP_TOP_NOP", uops) self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) - def test_binary_op_refcount_elimination(self): - class CustomAdder: - def __init__(self, val): - self.val = val - def __add__(self, other): - return CustomAdder(self.val + other.val) - - def testfunc(n): - a = CustomAdder(1) - b = CustomAdder(2) - res = None - for _ in range(n): - res = a + b - return res.val if res else 0 - - res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) - self.assertEqual(res, 3) - self.assertIsNotNone(ex) - uops = get_opnames(ex) - for uop in uops: - print(uop) - self.assertIn("_BINARY_OP", uops) - self.assertIn("_POP_TOP_NOP", uops) - self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) - def test_remove_guard_for_slice_list(self): def f(n): for i in range(n): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a84a2c276e175f..f7551bfbf3c2db 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5119,7 +5119,7 @@ dummy_func( assert(oparg <= NB_OPARG_LAST); } - op(_BINARY_OP, (lhs, rhs -- res, l, r)) { + op(_BINARY_OP, (lhs, rhs -- res)) { PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); @@ -5129,13 +5129,10 @@ dummy_func( ERROR_NO_POP(); } res = PyStackRef_FromPyObjectSteal(res_o); - l = lhs; - r = rhs; - DEAD(lhs); - DEAD(rhs); + DECREF_INPUTS(); } - macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP + POP_TOP + POP_TOP; + macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP; pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 16eb3a8e82b6b4..9ad915ea01a54b 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -210,10 +210,8 @@ dummy_func(void) { sym_set_type(left, &PyFloat_Type); } - op(_BINARY_OP, (lhs, rhs -- res, l, r)) { + op(_BINARY_OP, (lhs, rhs -- res)) { REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs, res); - l = lhs; - r = rhs; bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); From 444c8d8eed321abd5c40277b6aa2341c598e0dd2 Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 03:15:18 +0900 Subject: [PATCH 9/9] regen all --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 6 ++--- Modules/_testinternalcapi/test_cases.c.h | 27 +++++++---------------- Python/executor_cases.c.h | 26 ++++++++++++++-------- Python/generated_cases.c.h | 27 +++++++---------------- Python/optimizer_cases.c.h | 26 +++++----------------- 7 files changed, 44 insertions(+), 72 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 95dee5d352670d..80c11b753be7e6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1343,7 +1343,7 @@ extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256]; #ifdef NEED_OPCODE_METADATA const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { - [BINARY_OP] = { .nuops = 3, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 } } }, + [BINARY_OP] = { .nuops = 1, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 } } }, [BINARY_OP_ADD_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 459535a7e6a74b..d23b447fb518f8 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -369,7 +369,7 @@ extern "C" { #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE #define MAX_UOP_ID 568 -#define _BINARY_OP_r23 569 +#define _BINARY_OP_r21 569 #define _BINARY_OP_ADD_FLOAT_r03 570 #define _BINARY_OP_ADD_FLOAT_r13 571 #define _BINARY_OP_ADD_FLOAT_r23 572 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 44795aed9f418d..4b8b9fc235d77f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -2919,7 +2919,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { .entries = { { -1, -1, -1 }, { -1, -1, -1 }, - { 3, 2, _BINARY_OP_r23 }, + { 1, 2, _BINARY_OP_r21 }, { -1, -1, -1 }, }, }, @@ -3929,7 +3929,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_COPY_3_r23] = _COPY_3, [_COPY_3_r33] = _COPY_3, [_COPY_r01] = _COPY, - [_BINARY_OP_r23] = _BINARY_OP, + [_BINARY_OP_r21] = _BINARY_OP, [_SWAP_2_r02] = _SWAP_2, [_SWAP_2_r12] = _SWAP_2, [_SWAP_2_r22] = _SWAP_2, @@ -4083,7 +4083,7 @@ const uint16_t _PyUop_SpillsAndReloads[4][4] = { const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_BINARY_OP] = "_BINARY_OP", - [_BINARY_OP_r23] = "_BINARY_OP_r23", + [_BINARY_OP_r21] = "_BINARY_OP_r21", [_BINARY_OP_ADD_FLOAT] = "_BINARY_OP_ADD_FLOAT", [_BINARY_OP_ADD_FLOAT_r03] = "_BINARY_OP_ADD_FLOAT_r03", [_BINARY_OP_ADD_FLOAT_r13] = "_BINARY_OP_ADD_FLOAT_r13", diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index 3c9eb9194366e8..a7d589dbe7b274 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -32,9 +32,6 @@ _PyStackRef lhs; _PyStackRef rhs; _PyStackRef res; - _PyStackRef l; - _PyStackRef r; - _PyStackRef value; // _SPECIALIZE_BINARY_OP { rhs = stack_pointer[-1]; @@ -68,26 +65,18 @@ JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); - l = lhs; - r = rhs; - } - // _POP_TOP - { - value = r; - stack_pointer[-2] = res; - stack_pointer[-1] = l; _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(value); + _PyStackRef tmp = lhs; + lhs = res; + stack_pointer[-2] = lhs; + PyStackRef_CLOSE(tmp); + tmp = rhs; + rhs = PyStackRef_NULL; + stack_pointer[-1] = rhs; + PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - } - // _POP_TOP - { - value = l; stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(value); - stack_pointer = _PyFrame_GetStackPointer(frame); } DISPATCH(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2bf24334d8eb1a..56d0b192edba7a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -16594,14 +16594,12 @@ break; } - case _BINARY_OP_r23: { + case _BINARY_OP_r21: { CHECK_CURRENT_CACHED_VALUES(2); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef rhs; _PyStackRef lhs; _PyStackRef res; - _PyStackRef l; - _PyStackRef r; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; oparg = CURRENT_OPARG(); @@ -16622,13 +16620,23 @@ JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); - l = lhs; - r = rhs; - _tos_cache2 = r; - _tos_cache1 = l; + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyStackRef tmp = lhs; + lhs = res; + stack_pointer[-2] = lhs; + PyStackRef_CLOSE(tmp); + tmp = rhs; + rhs = PyStackRef_NULL; + stack_pointer[-1] = rhs; + PyStackRef_CLOSE(tmp); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _tos_cache0 = res; - SET_CURRENT_CACHED_VALUES(3); - stack_pointer += -2; + _tos_cache1 = PyStackRef_ZERO_BITS; + _tos_cache2 = PyStackRef_ZERO_BITS; + SET_CURRENT_CACHED_VALUES(1); + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ae9e1e5421b87b..9df6b2f70f96df 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -32,9 +32,6 @@ _PyStackRef lhs; _PyStackRef rhs; _PyStackRef res; - _PyStackRef l; - _PyStackRef r; - _PyStackRef value; // _SPECIALIZE_BINARY_OP { rhs = stack_pointer[-1]; @@ -68,26 +65,18 @@ JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); - l = lhs; - r = rhs; - } - // _POP_TOP - { - value = r; - stack_pointer[-2] = res; - stack_pointer[-1] = l; _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(value); + _PyStackRef tmp = lhs; + lhs = res; + stack_pointer[-2] = lhs; + PyStackRef_CLOSE(tmp); + tmp = rhs; + rhs = PyStackRef_NULL; + stack_pointer[-1] = rhs; + PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); - } - // _POP_TOP - { - value = l; stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(value); - stack_pointer = _PyFrame_GetStackPointer(frame); } DISPATCH(); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index c8990a6d07a5cc..d5b9a4159d7341 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -3616,8 +3616,6 @@ JitOptRef rhs; JitOptRef lhs; JitOptRef res; - JitOptRef l; - JitOptRef r; rhs = stack_pointer[-1]; lhs = stack_pointer[-2]; if ( @@ -3629,8 +3627,6 @@ _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym); _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym); _PyStackRef res_stackref; - _PyStackRef l_stackref; - _PyStackRef r_stackref; /* Start of uop copied from bytecodes for constant evaluation */ PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); @@ -3640,29 +3636,21 @@ JUMP_TO_LABEL(error); } res_stackref = PyStackRef_FromPyObjectSteal(res_o); - l_stackref = lhs; - r_stackref = rhs; /* End of uop copied from bytecodes for constant evaluation */ - (void)l_stackref; - (void)r_stackref; res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { - // Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result - ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + ADD_OP(_POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); } } - CHECK_STACK_BOUNDS(1); + CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = res; - stack_pointer[-1] = l; - stack_pointer[0] = r; - stack_pointer += 1; + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } - l = lhs; - r = rhs; bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); @@ -3696,11 +3684,9 @@ else { res = sym_new_type(ctx, &PyFloat_Type); } - CHECK_STACK_BOUNDS(1); + CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = res; - stack_pointer[-1] = l; - stack_pointer[0] = r; - stack_pointer += 1; + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; }