diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst index 96078d22710527..f7c8ef8b22b955 100644 --- a/Doc/c-api/conversion.rst +++ b/Doc/c-api/conversion.rst @@ -130,6 +130,8 @@ The following functions provide locale-independent string to number conversions. *flags* can be zero or more of the following values or-ed together: + .. c:namespace:: NULL + .. c:macro:: Py_DTSF_SIGN Always precede the returned string with a sign @@ -151,9 +153,21 @@ The following functions provide locale-independent string to number conversions. .. versionadded:: 3.11 - If *ptype* is non-``NULL``, then the value it points to will be set to one of - ``Py_DTST_FINITE``, ``Py_DTST_INFINITE``, or ``Py_DTST_NAN``, signifying that - *val* is a finite number, an infinite number, or not a number, respectively. + If *ptype* is non-``NULL``, then the value it points to will be set to one + of the following constants depending on the type of *val*: + + .. list-table:: + :header-rows: 1 + :align: left + + * - *\*ptype* + - type of *val* + * - .. c:macro:: Py_DTST_FINITE + - finite number + * - .. c:macro:: Py_DTST_INFINITE + - infinite number + * - .. c:macro:: Py_DTST_NAN + - not a number The return value is a pointer to *buffer* with the converted string or ``NULL`` if the conversion failed. The caller is responsible for freeing the diff --git a/Doc/howto/remote_debugging.rst b/Doc/howto/remote_debugging.rst index 78b40bcdf7127b..dfe0176b75a020 100644 --- a/Doc/howto/remote_debugging.rst +++ b/Doc/howto/remote_debugging.rst @@ -8,6 +8,16 @@ execute Python code remotely. Most platforms require elevated privileges to attach to another Python process. +Disabling remote debugging +-------------------------- + +To disable remote debugging support, use any of the following: + +* Set the :envvar:`PYTHON_DISABLE_REMOTE_DEBUG` environment variable to ``1`` before + starting the interpreter. +* Use the :option:`-X disable_remote_debug` command-line option. +* Compile Python with the :option:`--without-remote-debug` build flag. + .. _permission-requirements: Permission requirements @@ -614,4 +624,3 @@ To inject and execute a Python script in a remote process: 6. Set ``_PY_EVAL_PLEASE_STOP_BIT`` in the ``eval_breaker`` field. 7. Resume the process (if suspended). The script will execute at the next safe evaluation point. - diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index a0621d4b0dbd09..f977f1389b61a5 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1997,6 +1997,9 @@ always available. Unless explicitly noted otherwise, all variables are read-only interpreter is pre-release (alpha, beta, or release candidate) then the local and remote interpreters must be the same exact version. + See :ref:`remote-debugging` for more information about the remote debugging + mechanism. + .. audit-event:: sys.remote_exec pid script_path When the code is executed in the remote process, an @@ -2015,6 +2018,7 @@ always available. Unless explicitly noted otherwise, all variables are read-only .. availability:: Unix, Windows. .. versionadded:: 3.14 + See :pep:`768` for more details. .. function:: _enablelegacywindowsfsencoding() diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 0592221f15226e..2ee518fb82f301 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -16,12 +16,102 @@ extern "C" { #include +typedef struct _PyJitUopBuffer { + _PyUOpInstruction *start; + _PyUOpInstruction *next; + _PyUOpInstruction *end; +} _PyJitUopBuffer; + + +typedef struct _JitOptContext { + char done; + char out_of_space; + bool contradiction; + // Has the builtins dict been watched? + bool builtins_watched; + // The current "executing" frame. + _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; + int curr_frame_depth; + + // Arena for the symbolic types. + ty_arena t_arena; + + JitOptRef *n_consumed; + JitOptRef *limit; + JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; + _PyJitUopBuffer out_buffer; +} JitOptContext; + + +static inline void +uop_buffer_init(_PyJitUopBuffer *trace, _PyUOpInstruction *start, uint32_t size) +{ + trace->next = trace->start = start; + trace->end = start + size; +} + +static inline _PyUOpInstruction * +uop_buffer_last(_PyJitUopBuffer *trace) +{ + assert(trace->next > trace->start); + return trace->next-1; +} + +static inline int +uop_buffer_length(_PyJitUopBuffer *trace) +{ + return (int)(trace->next - trace->start); +} + +static inline int +uop_buffer_remaining_space(_PyJitUopBuffer *trace) +{ + return (int)(trace->end - trace->next); +} + +typedef struct _PyJitTracerInitialState { + int stack_depth; + int chain_depth; + struct _PyExitData *exit; + PyCodeObject *code; // Strong + PyFunctionObject *func; // Strong + struct _PyExecutorObject *executor; // Strong + _Py_CODEUNIT *start_instr; + _Py_CODEUNIT *close_loop_instr; + _Py_CODEUNIT *jump_backward_instr; +} _PyJitTracerInitialState; + +typedef struct _PyJitTracerPreviousState { + bool dependencies_still_valid; + int instr_oparg; + int instr_stacklevel; + _Py_CODEUNIT *instr; + PyCodeObject *instr_code; // Strong + struct _PyInterpreterFrame *instr_frame; + _PyBloomFilter dependencies; +} _PyJitTracerPreviousState; + +typedef struct _PyJitTracerTranslatorState { + int jump_backward_seen; +} _PyJitTracerTranslatorState; + +typedef struct _PyJitTracerState { + bool is_tracing; + _PyJitTracerInitialState initial_state; + _PyJitTracerPreviousState prev_state; + _PyJitTracerTranslatorState translator_state; + JitOptContext opt_context; + _PyJitUopBuffer code_buffer; + _PyJitUopBuffer out_buffer; + _PyUOpInstruction uop_array[2 * UOP_MAX_TRACE_LENGTH]; +} _PyJitTracerState; + typedef struct _PyExecutorLinkListNode { struct _PyExecutorObject *next; struct _PyExecutorObject *previous; } _PyExecutorLinkListNode; - typedef struct { uint8_t opcode; uint8_t oparg; @@ -86,8 +176,8 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); int _Py_uop_analyze_and_optimize( _PyThreadStateImpl *tstate, - _PyUOpInstruction *trace, int trace_len, int curr_stackentries, - _PyBloomFilter *dependencies); + _PyUOpInstruction *input, int trace_len, int curr_stackentries, + _PyUOpInstruction *output, _PyBloomFilter *dependencies); extern PyTypeObject _PyUOpExecutor_Type; @@ -205,6 +295,8 @@ extern JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef value, extern bool _Py_uop_sym_is_compact_int(JitOptRef sym); extern JitOptRef _Py_uop_sym_new_compact_int(JitOptContext *ctx); extern void _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef sym); +extern JitOptRef _Py_uop_sym_new_predicate(JitOptContext *ctx, JitOptRef lhs_ref, JitOptRef rhs_ref, JitOptPredicateKind kind); +extern void _Py_uop_sym_apply_predicate_narrowing(JitOptContext *ctx, JitOptRef sym, bool branch_is_true); extern void _Py_uop_abstractcontext_init(JitOptContext *ctx); extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx); diff --git a/Include/internal/pycore_optimizer_types.h b/Include/internal/pycore_optimizer_types.h index 6501ce869c1425..a879ca26ce7b63 100644 --- a/Include/internal/pycore_optimizer_types.h +++ b/Include/internal/pycore_optimizer_types.h @@ -40,6 +40,7 @@ typedef enum _JitSymType { JIT_SYM_TUPLE_TAG = 8, JIT_SYM_TRUTHINESS_TAG = 9, JIT_SYM_COMPACT_INT = 10, + JIT_SYM_PREDICATE_TAG = 11, } JitSymType; typedef struct _jit_opt_known_class { @@ -72,6 +73,18 @@ typedef struct { uint16_t value; } JitOptTruthiness; +typedef enum { + JIT_PRED_IS, + JIT_PRED_IS_NOT, +} JitOptPredicateKind; + +typedef struct { + uint8_t tag; + uint8_t kind; + uint16_t lhs; + uint16_t rhs; +} JitOptPredicate; + typedef struct { uint8_t tag; } JitOptCompactInt; @@ -84,6 +97,7 @@ typedef union _jit_opt_symbol { JitOptTuple tuple; JitOptTruthiness truthiness; JitOptCompactInt compact; + JitOptPredicate predicate; } JitOptSymbol; // This mimics the _PyStackRef API @@ -112,27 +126,6 @@ typedef struct ty_arena { JitOptSymbol arena[TY_ARENA_SIZE]; } ty_arena; -typedef struct _JitOptContext { - char done; - char out_of_space; - bool contradiction; - // Has the builtins dict been watched? - bool builtins_watched; - // The current "executing" frame. - _Py_UOpsAbstractFrame *frame; - _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; - int curr_frame_depth; - - // Arena for the symbolic types. - ty_arena t_arena; - - JitOptRef *n_consumed; - JitOptRef *limit; - JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; - _PyUOpInstruction *out_buffer; - int out_len; -} JitOptContext; - #ifdef __cplusplus } diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 24a40416c2191b..64b90710b8e664 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -12,7 +12,6 @@ extern "C" { #include "pycore_freelist_state.h" // struct _Py_freelists #include "pycore_interpframe_structs.h" // _PyInterpreterFrame #include "pycore_mimalloc.h" // struct _mimalloc_thread_state -#include "pycore_optimizer_types.h" // JitOptContext #include "pycore_qsbr.h" // struct qsbr #include "pycore_uop.h" // struct _PyUOpInstruction #include "pycore_structs.h" @@ -24,46 +23,6 @@ struct _gc_thread_state { }; #endif -#if _Py_TIER2 -typedef struct _PyJitTracerInitialState { - int stack_depth; - int chain_depth; - struct _PyExitData *exit; - PyCodeObject *code; // Strong - PyFunctionObject *func; // Strong - struct _PyExecutorObject *executor; // Strong - _Py_CODEUNIT *start_instr; - _Py_CODEUNIT *close_loop_instr; - _Py_CODEUNIT *jump_backward_instr; -} _PyJitTracerInitialState; - -typedef struct _PyJitTracerPreviousState { - bool dependencies_still_valid; - int code_max_size; - int code_curr_size; - int instr_oparg; - int instr_stacklevel; - _Py_CODEUNIT *instr; - PyCodeObject *instr_code; // Strong - struct _PyInterpreterFrame *instr_frame; - _PyBloomFilter dependencies; -} _PyJitTracerPreviousState; - -typedef struct _PyJitTracerTranslatorState { - int jump_backward_seen; -} _PyJitTracerTranslatorState; - -typedef struct _PyJitTracerState { - bool is_tracing; - _PyJitTracerInitialState initial_state; - _PyJitTracerPreviousState prev_state; - _PyJitTracerTranslatorState translator_state; - JitOptContext opt_context; - _PyUOpInstruction code_buffer[UOP_MAX_TRACE_LENGTH]; - _PyUOpInstruction out_buffer[UOP_MAX_TRACE_LENGTH]; -} _PyJitTracerState; - -#endif // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The // PyThreadState fields are exposed as part of the C API, although most fields @@ -141,7 +100,7 @@ typedef struct _PyThreadStateImpl { Py_ssize_t reftotal; // this thread's total refcount operations #endif #if _Py_TIER2 - _PyJitTracerState *jit_tracer_state; + struct _PyJitTracerState *jit_tracer_state; #endif } _PyThreadStateImpl; diff --git a/Include/internal/pycore_uop.h b/Include/internal/pycore_uop.h index e828a1cc5a5722..f9be01acb57197 100644 --- a/Include/internal/pycore_uop.h +++ b/Include/internal/pycore_uop.h @@ -38,11 +38,10 @@ typedef struct _PyUOpInstruction{ // This is the length of the trace we translate initially. #ifdef Py_DEBUG // With asserts, the stencils are a lot larger -#define UOP_MAX_TRACE_LENGTH 2000 +#define UOP_MAX_TRACE_LENGTH 1000 #else -#define UOP_MAX_TRACE_LENGTH 5000 +#define UOP_MAX_TRACE_LENGTH 2500 #endif -#define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction)) /* Bloom filter with m = 256 * https://en.wikipedia.org/wiki/Bloom_filter */ diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index c9c757857a8a5d..3997acbdf84695 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2857,24 +2857,6 @@ def func(): names = ["func", "outer", "outer", "inner", "inner", "outer", "inner"] self.do_test(func, names) - def test_replaced_interpreter(self): - def inner(): - yield 'abc' - def outer(): - yield from inner() - def func(): - list(outer()) - _testinternalcapi.set_eval_frame_interp() - try: - func() - finally: - _testinternalcapi.set_eval_frame_default() - - stats = _testinternalcapi.get_eval_frame_stats() - - self.assertEqual(stats["resumes"], 5) - self.assertEqual(stats["loads"], 5) - @unittest.skipUnless(support.Py_GIL_DISABLED, 'need Py_GIL_DISABLED') class TestPyThreadId(unittest.TestCase): diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 307eac6ee51756..7c33320e9f1785 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -3551,6 +3551,46 @@ def test_is_none(n): self.assertIn("_POP_TOP_NOP", uops) self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_is_true_narrows_to_constant(self): + def f(n): + def return_true(): + return True + + hits = 0 + v = return_true() + for i in range(n): + if v is True: + hits += v + 1 + return hits + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD * 2) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # v + 1 should be constant folded + self.assertNotIn("_BINARY_OP", uops) + + def test_is_false_narrows_to_constant(self): + def f(n): + def return_false(): + return False + + hits = 0 + v = return_false() + for i in range(n): + if v is False: + hits += v + 1 + return hits + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # v + 1 should be constant folded + self.assertNotIn("_BINARY_OP", uops) + def test_for_iter_gen_frame(self): def f(n): for i in range(n): diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 1d8e908efb0572..a5708b298c84a5 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1,6 +1,5 @@ import builtins import codecs -import _datetime import gc import io import locale @@ -494,7 +493,7 @@ def test_getframemodulename(self): self.assertIs(f, f2) self.assertIsNone(sys._getframemodulename(i)) - # sys._current_frames() is a CPython-only gimmick. + @support.cpython_only # sys._current_frames() is a CPython-only gimmick. @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_current_frames(self): @@ -1742,7 +1741,12 @@ def delx(self): del self.__x x = property(getx, setx, delx, "") check(x, size('5Pi')) # PyCapsule - check(_datetime.datetime_CAPI, size('6P')) + try: + import _datetime + except ModuleNotFoundError: + pass + else: + check(_datetime.datetime_CAPI, size('6P')) # rangeiterator check(iter(range(1)), size('3l')) check(iter(range(2**65)), size('3P')) diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 3b4b3253b3638c..d791ba0e8eca97 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -433,7 +433,7 @@ do { \ JUMP_TO_LABEL(error); \ } \ if (keep_tracing_bit) { \ - assert(((_PyThreadStateImpl *)tstate)->jit_tracer_state->prev_state.code_curr_size == 2); \ + assert(uop_buffer_length(&((_PyThreadStateImpl *)tstate)->jit_tracer_state->code_buffer)); \ ENTER_TRACING(); \ DISPATCH_NON_TRACING(); \ } \ diff --git a/Python/optimizer.c b/Python/optimizer.c index 15a1eb5a17745b..f25242972efeb1 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -188,9 +188,6 @@ _PyOptimizer_Optimize( } insert_executor(code, start, index, executor); } - else { - executor->vm_data.code = NULL; - } executor->vm_data.chain_depth = chain_depth; assert(executor->vm_data.valid); _PyExitData *exit = _tstate->jit_tracer_state->initial_state.exit; @@ -547,52 +544,43 @@ guard_ip_uop[MAX_UOP_ID + 1] = { #endif -static inline int +static inline void add_to_trace( - _PyUOpInstruction *trace, - int trace_length, + _PyJitUopBuffer *trace, uint16_t opcode, uint16_t oparg, uint64_t operand, uint32_t target) { - trace[trace_length].opcode = opcode; - trace[trace_length].format = UOP_FORMAT_TARGET; - trace[trace_length].target = target; - trace[trace_length].oparg = oparg; - trace[trace_length].operand0 = operand; + _PyUOpInstruction *inst = trace->next; + inst->opcode = opcode; + inst->format = UOP_FORMAT_TARGET; + inst->target = target; + inst->oparg = oparg; + inst->operand0 = operand; #ifdef Py_STATS - trace[trace_length].execution_count = 0; + inst->execution_count = 0; #endif - return trace_length + 1; + trace->next++; } + #ifdef Py_DEBUG #define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \ - assert(trace_length < max_length); \ - trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET)); \ + add_to_trace(trace, (OPCODE), (OPARG), (OPERAND), (TARGET)); \ if (lltrace >= 2) { \ - printf("%4d ADD_TO_TRACE: ", trace_length); \ - _PyUOpPrint(&trace[trace_length-1]); \ + printf("%4d ADD_TO_TRACE: ", uop_buffer_length(trace)); \ + _PyUOpPrint(uop_buffer_last(trace)); \ printf("\n"); \ } #else #define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \ - assert(trace_length < max_length); \ - trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET)); + add_to_trace(trace, (OPCODE), (OPARG), (OPERAND), (TARGET)) #endif #define INSTR_IP(INSTR, CODE) \ ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive))) -// Reserve space for n uops -#define RESERVE_RAW(n, opname) \ - if (trace_length + (n) > max_length) { \ - DPRINTF(2, "No room for %s (need %d, got %d)\n", \ - (opname), (n), max_length - trace_length); \ - OPT_STAT_INC(trace_too_long); \ - goto full; \ - } static int is_terminator(const _PyUOpInstruction *uop) @@ -629,9 +617,7 @@ _PyJit_translate_single_bytecode_to_trace( PyCodeObject *old_code = tracer->prev_state.instr_code; bool progress_needed = (tracer->initial_state.chain_depth % MAX_CHAIN_DEPTH) == 0; _PyBloomFilter *dependencies = &tracer->prev_state.dependencies; - int trace_length = tracer->prev_state.code_curr_size; - _PyUOpInstruction *trace = tracer->code_buffer; - int max_length = tracer->prev_state.code_max_size; + _PyJitUopBuffer *trace = &tracer->code_buffer; _Py_CODEUNIT *this_instr = tracer->prev_state.instr; _Py_CODEUNIT *target_instr = this_instr; @@ -670,15 +656,13 @@ _PyJit_translate_single_bytecode_to_trace( } } - int old_stack_level = tracer->prev_state.instr_stacklevel; - // Strange control-flow bool has_dynamic_jump_taken = OPCODE_HAS_UNPREDICTABLE_JUMP(opcode) && (next_instr != this_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]); /* Special case the first instruction, * so that we can guarantee forward progress */ - if (progress_needed && tracer->prev_state.code_curr_size < CODE_SIZE_NO_PROGRESS) { + if (progress_needed && uop_buffer_length(&tracer->code_buffer) < CODE_SIZE_NO_PROGRESS) { if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) { opcode = _PyOpcode_Deopt[opcode]; } @@ -694,7 +678,7 @@ _PyJit_translate_single_bytecode_to_trace( int is_sys_tracing = (tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL); if (is_sys_tracing) { - goto full; + goto done; } if (stop_tracing_opcode == _DEOPT) { @@ -710,7 +694,7 @@ _PyJit_translate_single_bytecode_to_trace( goto done; } - DPRINTF(2, "%p %d: %s(%d) %d %d\n", old_code, target, _PyOpcode_OpName[opcode], oparg, needs_guard_ip, old_stack_level); + DPRINTF(2, "%p %d: %s(%d) %d\n", old_code, target, _PyOpcode_OpName[opcode], oparg, needs_guard_ip); #ifdef Py_DEBUG if (oparg > 255) { @@ -719,7 +703,7 @@ _PyJit_translate_single_bytecode_to_trace( #endif if (!tracer->prev_state.dependencies_still_valid) { - goto full; + goto done; } // This happens when a recursive call happens that we can't trace. Such as Python -> C -> Python calls @@ -734,16 +718,14 @@ _PyJit_translate_single_bytecode_to_trace( unsupported: { // Rewind to previous instruction and replace with _EXIT_TRACE. - _PyUOpInstruction *curr = &trace[trace_length-1]; - while (curr->opcode != _SET_IP && trace_length > 2) { - trace_length--; - curr = &trace[trace_length-1]; + _PyUOpInstruction *curr = uop_buffer_last(trace); + while (curr->opcode != _SET_IP && uop_buffer_length(trace) > 2) { + trace->next--; + curr = uop_buffer_last(trace); } - assert(curr->opcode == _SET_IP || trace_length == 2); + assert(curr->opcode == _SET_IP || uop_buffer_length(trace) == 2); if (curr->opcode == _SET_IP) { int32_t old_target = (int32_t)uop_get_target(curr); - curr++; - trace_length++; curr->opcode = _DEOPT; curr->format = UOP_FORMAT_TARGET; curr->target = old_target; @@ -752,7 +734,6 @@ _PyJit_translate_single_bytecode_to_trace( } } - if (opcode == NOP) { return 1; } @@ -766,7 +747,7 @@ _PyJit_translate_single_bytecode_to_trace( } // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT - max_length -= 2; + trace->end -= 2; const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; @@ -775,18 +756,28 @@ _PyJit_translate_single_bytecode_to_trace( if (OPCODE_HAS_EXIT(opcode)) { - // Make space for side exit and final _EXIT_TRACE: - max_length--; + // Make space for side exit + trace->end--; } if (OPCODE_HAS_ERROR(opcode)) { - // Make space for error stub and final _EXIT_TRACE: - max_length--; + // Make space for error stub + trace->end--; + } + if (OPCODE_HAS_DEOPT(opcode)) { + // Make space for side exit + trace->end--; } // _GUARD_IP leads to an exit. - max_length -= needs_guard_ip; + trace->end -= needs_guard_ip; - RESERVE_RAW(expansion->nuops + needs_guard_ip + 2 + (!OPCODE_HAS_NO_SAVE_IP(opcode)), "uop and various checks"); + int space_needed = expansion->nuops + needs_guard_ip + 2 + (!OPCODE_HAS_NO_SAVE_IP(opcode)); + if (uop_buffer_remaining_space(trace) < space_needed) { + DPRINTF(2, "No room for expansions and guards (need %d, got %d)\n", + space_needed, uop_buffer_remaining_space(trace)); + OPT_STAT_INC(trace_too_long); + goto done; + } ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); @@ -825,7 +816,7 @@ _PyJit_translate_single_bytecode_to_trace( { if ((next_instr != tracer->initial_state.close_loop_instr) && (next_instr != tracer->initial_state.start_instr) && - tracer->prev_state.code_curr_size > CODE_SIZE_NO_PROGRESS && + uop_buffer_length(&tracer->code_buffer) > CODE_SIZE_NO_PROGRESS && // For side exits, we don't want to terminate them early. tracer->initial_state.exit == NULL && // These are coroutines, and we want to unroll those usually. @@ -836,7 +827,7 @@ _PyJit_translate_single_bytecode_to_trace( // inner loop might start and let the traces rejoin. OPT_STAT_INC(inner_loop); ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); - trace[trace_length-1].operand1 = true; // is_control_flow + uop_buffer_last(trace)->operand1 = true; // is_control_flow DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr, tracer->initial_state.close_loop_instr, tracer->initial_state.start_instr); goto done; @@ -913,19 +904,19 @@ _PyJit_translate_single_bytecode_to_trace( } break; case OPERAND1_1: - assert(trace[trace_length-1].opcode == uop); + assert(uop_buffer_last(trace)->opcode == uop); operand = read_u16(&this_instr[offset].cache); - trace[trace_length-1].operand1 = operand; + uop_buffer_last(trace)->operand1 = operand; continue; case OPERAND1_2: - assert(trace[trace_length-1].opcode == uop); + assert(uop_buffer_last(trace)->opcode == uop); operand = read_u32(&this_instr[offset].cache); - trace[trace_length-1].operand1 = operand; + uop_buffer_last(trace)->operand1 = operand; continue; case OPERAND1_4: - assert(trace[trace_length-1].opcode == uop); + assert(uop_buffer_last(trace)->opcode == uop); operand = read_u64(&this_instr[offset].cache); - trace[trace_length-1].operand1 = operand; + uop_buffer_last(trace)->operand1 = operand; continue; default: fprintf(stderr, @@ -955,7 +946,7 @@ _PyJit_translate_single_bytecode_to_trace( } } ADD_TO_TRACE(uop, oparg, operand, target); - trace[trace_length - 1].operand1 = PyStackRef_IsNone(frame->f_executable) ? 2 : ((int)(frame->stackpointer - _PyFrame_Stackbase(frame))); + uop_buffer_last(trace)->operand1 = PyStackRef_IsNone(frame->f_executable) ? 2 : ((int)(frame->stackpointer - _PyFrame_Stackbase(frame))); break; } if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) { @@ -973,9 +964,9 @@ _PyJit_translate_single_bytecode_to_trace( } // End switch (opcode) if (needs_guard_ip) { - uint16_t guard_ip = guard_ip_uop[trace[trace_length-1].opcode]; + uint16_t guard_ip = guard_ip_uop[uop_buffer_last(trace)->opcode]; if (guard_ip == 0) { - DPRINTF(1, "Unknown uop needing guard ip %s\n", _PyOpcode_uop_name[trace[trace_length-1].opcode]); + DPRINTF(1, "Unknown uop needing guard ip %s\n", _PyOpcode_uop_name[uop_buffer_last(trace)->opcode]); Py_UNREACHABLE(); } ADD_TO_TRACE(guard_ip, 0, (uintptr_t)next_instr, 0); @@ -983,7 +974,7 @@ _PyJit_translate_single_bytecode_to_trace( // Loop back to the start int is_first_instr = tracer->initial_state.close_loop_instr == next_instr || tracer->initial_state.start_instr == next_instr; - if (is_first_instr && tracer->prev_state.code_curr_size > CODE_SIZE_NO_PROGRESS) { + if (is_first_instr && uop_buffer_length(trace) > CODE_SIZE_NO_PROGRESS) { if (needs_guard_ip) { ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)next_instr, 0); } @@ -991,27 +982,13 @@ _PyJit_translate_single_bytecode_to_trace( goto done; } DPRINTF(2, "Trace continuing\n"); - tracer->prev_state.code_curr_size = trace_length; - tracer->prev_state.code_max_size = max_length; return 1; done: DPRINTF(2, "Trace done\n"); - tracer->prev_state.code_curr_size = trace_length; - tracer->prev_state.code_max_size = max_length; - return 0; -full: - DPRINTF(2, "Trace full\n"); - if (!is_terminator(&tracer->code_buffer[trace_length-1])) { - // Undo the last few instructions. - trace_length = tracer->prev_state.code_curr_size; - max_length = tracer->prev_state.code_max_size; - // We previously reversed one. - max_length += 1; + if (!is_terminator(uop_buffer_last(trace))) { ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); - trace[trace_length-1].operand1 = true; // is_control_flow + uop_buffer_last(trace)->operand1 = true; // is_control_flow } - tracer->prev_state.code_curr_size = trace_length; - tracer->prev_state.code_max_size = max_length; return 0; } @@ -1059,11 +1036,12 @@ _PyJit_TryInitializeTracing( 2 * INSTR_IP(close_loop_instr, code), chain_depth); #endif - add_to_trace(tracer->code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code)); - add_to_trace(tracer->code_buffer, 1, _MAKE_WARM, 0, 0, 0); - tracer->prev_state.code_curr_size = CODE_SIZE_EMPTY; + /* Set up tracing buffer*/ + _PyJitUopBuffer *trace = &tracer->code_buffer; + uop_buffer_init(trace, &tracer->uop_array[0], UOP_MAX_TRACE_LENGTH); + ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code)); + ADD_TO_TRACE(_MAKE_WARM, 0, 0, 0); - tracer->prev_state.code_max_size = UOP_MAX_TRACE_LENGTH/2; tracer->initial_state.start_instr = start_instr; tracer->initial_state.close_loop_instr = close_loop_instr; tracer->initial_state.code = (PyCodeObject *)Py_NewRef(code); @@ -1122,8 +1100,7 @@ _PyJit_FinalizeTracing(PyThreadState *tstate, int err) Py_CLEAR(tracer->initial_state.func); Py_CLEAR(tracer->initial_state.executor); Py_CLEAR(tracer->prev_state.instr_code); - tracer->prev_state.code_curr_size = CODE_SIZE_EMPTY; - tracer->prev_state.code_max_size = UOP_MAX_TRACE_LENGTH/2 - 1; + uop_buffer_init(&tracer->code_buffer, &tracer->uop_array[0], UOP_MAX_TRACE_LENGTH); tracer->is_tracing = false; } @@ -1137,7 +1114,6 @@ _PyJit_TracerFree(_PyThreadStateImpl *_tstate) } #undef RESERVE -#undef RESERVE_RAW #undef INSTR_IP #undef ADD_TO_TRACE #undef DPRINTF @@ -1467,39 +1443,47 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length) static int -stack_allocate(_PyUOpInstruction *buffer, int length) +stack_allocate(_PyUOpInstruction *buffer, _PyUOpInstruction *output, int length) { assert(buffer[0].opcode == _START_EXECUTOR); - for (int i = length-1; i >= 0; i--) { - buffer[i*2+1] = buffer[i]; - buffer[i*2].format = UOP_FORMAT_TARGET; - buffer[i*2].oparg = 0; - buffer[i*2].target = 0; + /* The input buffer and output buffers will overlap. + Make sure that we can move instructions to the output + without overwriting the input. */ + if (buffer == output) { + // This can only happen if optimizer has not been run + for (int i = 0; i < length; i++) { + buffer[i + UOP_MAX_TRACE_LENGTH] = buffer[i]; + } + buffer += UOP_MAX_TRACE_LENGTH; + } + else { + assert(output + UOP_MAX_TRACE_LENGTH == buffer); } int depth = 0; + _PyUOpInstruction *write = output; for (int i = 0; i < length; i++) { - _PyUOpInstruction *spill_or_reload = &buffer[i*2]; - int uop = buffer[i*2+1].opcode; + int uop = buffer[i].opcode; if (uop == _NOP) { - // leave _NOPs to be cleaned up later - spill_or_reload->opcode = _NOP; continue; } int new_depth = _PyUop_Caching[uop].best[depth]; - if (new_depth == depth) { - spill_or_reload->opcode = _NOP; - } - else { - spill_or_reload->opcode = _PyUop_SpillsAndReloads[depth][new_depth]; + if (new_depth != depth) { + write->opcode = _PyUop_SpillsAndReloads[depth][new_depth]; + assert(write->opcode != 0); + write->format = UOP_FORMAT_TARGET; + write->oparg = 0; + write->target = 0; + write++; depth = new_depth; } + *write = buffer[i]; uint16_t new_opcode = _PyUop_Caching[uop].entries[depth].opcode; assert(new_opcode != 0); - assert(spill_or_reload->opcode != 0); - buffer[i*2+1].opcode = new_opcode; + write->opcode = new_opcode; + write++; depth = _PyUop_Caching[uop].entries[depth].output; } - return length*2; + return write - output; } static int @@ -1512,28 +1496,28 @@ uop_optimize( _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; assert(_tstate->jit_tracer_state != NULL); _PyBloomFilter *dependencies = &_tstate->jit_tracer_state->prev_state.dependencies; - _PyUOpInstruction *buffer = _tstate->jit_tracer_state->code_buffer; + _PyUOpInstruction *buffer = _tstate->jit_tracer_state->code_buffer.start; OPT_STAT_INC(attempts); bool is_noopt = !tstate->interp->opt_config.uops_optimize_enabled; int curr_stackentries = _tstate->jit_tracer_state->initial_state.stack_depth; - int length = _tstate->jit_tracer_state->prev_state.code_curr_size; + int length = uop_buffer_length(&_tstate->jit_tracer_state->code_buffer); if (length <= CODE_SIZE_NO_PROGRESS) { return 0; } assert(length > 0); - assert(length < UOP_MAX_TRACE_LENGTH/2); + assert(length < UOP_MAX_TRACE_LENGTH); OPT_STAT_INC(traces_created); if (!is_noopt) { + _PyUOpInstruction *output = &_tstate->jit_tracer_state->uop_array[UOP_MAX_TRACE_LENGTH]; length = _Py_uop_analyze_and_optimize( - _tstate, - buffer, length, - curr_stackentries, dependencies); + _tstate, buffer, length, curr_stackentries, + output, dependencies); if (length <= 0) { return length; } - buffer = _tstate->jit_tracer_state->out_buffer; + buffer = output; } - assert(length < UOP_MAX_TRACE_LENGTH/2); + assert(length < UOP_MAX_TRACE_LENGTH); assert(length >= 1); /* Fix up */ for (int pc = 0; pc < length; pc++) { @@ -1549,7 +1533,9 @@ uop_optimize( assert(_PyOpcode_uop_name[buffer[pc].opcode]); } OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist); - length = stack_allocate(buffer, length); + _PyUOpInstruction *output = &_tstate->jit_tracer_state->uop_array[0]; + length = stack_allocate(buffer, output, length); + buffer = output; length = prepare_for_execution(buffer, length); assert(length <= UOP_MAX_TRACE_LENGTH); _PyExecutorObject *executor = make_executor_from_uops( @@ -1707,6 +1693,7 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s { executor->vm_data.valid = true; executor->vm_data.pending_deletion = 0; + executor->vm_data.code = NULL; for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) { executor->vm_data.bloom.bits[i] = dependency_set->bits[i]; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 01fd24564f8c2b..c6a1ae60a317fa 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -203,14 +203,14 @@ static inline void add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, uint16_t opcode, uint16_t oparg, uintptr_t operand0) { - _PyUOpInstruction *out = &ctx->out_buffer[ctx->out_len]; + _PyUOpInstruction *out = ctx->out_buffer.next; out->opcode = (opcode); out->format = this_instr->format; out->oparg = (oparg); out->target = this_instr->target; out->operand0 = (operand0); out->operand1 = this_instr->operand1; - ctx->out_len++; + ctx->out_buffer.next++; } /* Shortened forms for convenience, used in optimizer_bytecodes.c */ @@ -247,6 +247,8 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define sym_new_predicate _Py_uop_sym_new_predicate +#define sym_apply_predicate_narrowing _Py_uop_sym_apply_predicate_narrowing #define JUMP_TO_LABEL(label) goto label; @@ -428,6 +430,7 @@ optimize_uops( _PyUOpInstruction *trace, int trace_len, int curr_stacklen, + _PyUOpInstruction *output, _PyBloomFilter *dependencies ) { @@ -438,7 +441,7 @@ optimize_uops( JitOptContext *ctx = &tstate->jit_tracer_state->opt_context; uint32_t opcode = UINT16_MAX; - ctx->out_buffer = tstate->jit_tracer_state->out_buffer; + uop_buffer_init(&ctx->out_buffer, output, UOP_MAX_TRACE_LENGTH); // Make sure that watchers are set up PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -456,14 +459,20 @@ optimize_uops( ctx->curr_frame_depth++; ctx->frame = frame; - ctx->out_len = 0; - _PyUOpInstruction *this_instr = NULL; JitOptRef *stack_pointer = ctx->frame->stack_pointer; - for (int i = 0; !ctx->done; i++) { - assert(i < trace_len); + for (int i = 0; i < trace_len; i++) { this_instr = &trace[i]; + if (ctx->done) { + // Don't do any more optimization, but + // we still need to reach a terminator for corrctness. + *(ctx->out_buffer.next++) = *this_instr; + if (is_terminator_uop(this_instr)) { + break; + } + continue; + } int oparg = this_instr->oparg; opcode = this_instr->opcode; @@ -483,6 +492,8 @@ optimize_uops( } #endif + _PyUOpInstruction *out_ptr = ctx->out_buffer.next; + switch (opcode) { #include "optimizer_cases.c.h" @@ -492,8 +503,8 @@ optimize_uops( Py_UNREACHABLE(); } // If no ADD_OP was called during this iteration, copy the original instruction - if (ctx->out_len == i) { - ctx->out_buffer[ctx->out_len++] = *this_instr; + if (ctx->out_buffer.next == out_ptr) { + *(ctx->out_buffer.next++) = *this_instr; } assert(ctx->frame != NULL); if (!CURRENT_FRAME_IS_INIT_SHIM()) { @@ -524,20 +535,11 @@ optimize_uops( * would be no benefit in retrying later */ _Py_uop_abstractcontext_fini(ctx); // Check that the trace ends with a proper terminator - if (ctx->out_len > 0) { - _PyUOpInstruction *last_uop = &ctx->out_buffer[ctx->out_len - 1]; - if (!is_terminator_uop(last_uop)) { - // Copy remaining uops from original trace until we find a terminator - for (int i = ctx->out_len; i < trace_len; i++) { - ctx->out_buffer[ctx->out_len++] = trace[i]; - if (is_terminator_uop(&trace[i])) { - break; - } - } - } + if (uop_buffer_length(&ctx->out_buffer) > 0) { + assert(is_terminator_uop(uop_buffer_last(&ctx->out_buffer))); } - return ctx->out_len; + return uop_buffer_length(&ctx->out_buffer); error: DPRINTF(3, "\n"); @@ -694,14 +696,15 @@ _Py_uop_analyze_and_optimize( _PyUOpInstruction *buffer, int length, int curr_stacklen, + _PyUOpInstruction *output, _PyBloomFilter *dependencies ) { OPT_STAT_INC(optimizer_attempts); length = optimize_uops( - tstate, buffer, - length, curr_stacklen, dependencies); + tstate, buffer, length, curr_stacklen, + output, dependencies); if (length == 0) { return length; @@ -709,7 +712,7 @@ _Py_uop_analyze_and_optimize( assert(length > 0); - length = remove_unneeded_uops(tstate->jit_tracer_state->out_buffer, length); + length = remove_unneeded_uops(output, length); assert(length > 0); OPT_STAT_INC(optimizer_successes); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 366094d939a396..1584e731d1b2d4 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -38,6 +38,8 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define sym_new_predicate _Py_uop_sym_new_predicate +#define sym_apply_predicate_narrowing _Py_uop_sym_apply_predicate_narrowing extern int optimize_to_bool( @@ -192,7 +194,6 @@ dummy_func(void) { _Py_BloomFilter_Add(dependencies, type); } } - } } @@ -533,7 +534,7 @@ dummy_func(void) { } op(_IS_OP, (left, right -- b, l, r)) { - b = sym_new_type(ctx, &PyBool_Type); + b = sym_new_predicate(ctx, left, right, (oparg ? JIT_PRED_IS_NOT : JIT_PRED_IS)); l = left; r = right; } @@ -796,7 +797,7 @@ dummy_func(void) { if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyFunction_Type)) { assert(PyFunction_Check(sym_get_const(ctx, callable))); ADD_OP(_CHECK_FUNCTION_VERSION_INLINE, 0, func_version); - ctx->out_buffer[ctx->out_len - 1].operand1 = (uintptr_t)sym_get_const(ctx, callable); + uop_buffer_last(&ctx->out_buffer)->operand1 = (uintptr_t)sym_get_const(ctx, callable); } sym_set_type(callable, &PyFunction_Type); } @@ -806,7 +807,7 @@ dummy_func(void) { PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable); assert(PyMethod_Check(method)); ADD_OP(_CHECK_FUNCTION_VERSION_INLINE, 0, func_version); - ctx->out_buffer[ctx->out_len - 1].operand1 = (uintptr_t)method->im_func; + uop_buffer_last(&ctx->out_buffer)->operand1 = (uintptr_t)method->im_func; } sym_set_type(callable, &PyMethod_Type); } @@ -1173,6 +1174,8 @@ dummy_func(void) { } op(_GUARD_IS_TRUE_POP, (flag -- )) { + sym_apply_predicate_narrowing(ctx, flag, true); + if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); assert(value != NULL); @@ -1191,6 +1194,8 @@ dummy_func(void) { } op(_GUARD_IS_FALSE_POP, (flag -- )) { + sym_apply_predicate_narrowing(ctx, flag, false); + if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); assert(value != NULL); @@ -1564,7 +1569,7 @@ dummy_func(void) { ctx->frame->globals_watched = true; } if (ctx->frame->globals_checked_version != version && this_instr[-1].opcode == _NOP) { - REPLACE_OP(&ctx->out_buffer[ctx->out_len - 1], _GUARD_GLOBALS_VERSION, 0, version); + REPLACE_OP(uop_buffer_last(&ctx->out_buffer), _GUARD_GLOBALS_VERSION, 0, version); ctx->frame->globals_checked_version = version; } if (ctx->frame->globals_checked_version == version) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 2cc53937925a24..341805d51e24cd 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1557,7 +1557,7 @@ ctx->frame->globals_watched = true; } if (ctx->frame->globals_checked_version != version && this_instr[-1].opcode == _NOP) { - REPLACE_OP(&ctx->out_buffer[ctx->out_len - 1], _GUARD_GLOBALS_VERSION, 0, version); + REPLACE_OP(uop_buffer_last(&ctx->out_buffer), _GUARD_GLOBALS_VERSION, 0, version); ctx->frame->globals_checked_version = version; } if (ctx->frame->globals_checked_version == version) { @@ -2293,7 +2293,7 @@ JitOptRef r; right = stack_pointer[-1]; left = stack_pointer[-2]; - b = sym_new_type(ctx, &PyBool_Type); + b = sym_new_predicate(ctx, left, right, (oparg ? JIT_PRED_IS_NOT : JIT_PRED_IS)); l = left; r = right; CHECK_STACK_BOUNDS(1); @@ -2861,7 +2861,7 @@ if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyFunction_Type)) { assert(PyFunction_Check(sym_get_const(ctx, callable))); ADD_OP(_CHECK_FUNCTION_VERSION_INLINE, 0, func_version); - ctx->out_buffer[ctx->out_len - 1].operand1 = (uintptr_t)sym_get_const(ctx, callable); + uop_buffer_last(&ctx->out_buffer)->operand1 = (uintptr_t)sym_get_const(ctx, callable); } sym_set_type(callable, &PyFunction_Type); break; @@ -2879,7 +2879,7 @@ PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable); assert(PyMethod_Check(method)); ADD_OP(_CHECK_FUNCTION_VERSION_INLINE, 0, func_version); - ctx->out_buffer[ctx->out_len - 1].operand1 = (uintptr_t)method->im_func; + uop_buffer_last(&ctx->out_buffer)->operand1 = (uintptr_t)method->im_func; } sym_set_type(callable, &PyMethod_Type); break; @@ -3715,6 +3715,7 @@ case _GUARD_IS_TRUE_POP: { JitOptRef flag; flag = stack_pointer[-1]; + sym_apply_predicate_narrowing(ctx, flag, true); if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); assert(value != NULL); @@ -3739,6 +3740,7 @@ case _GUARD_IS_FALSE_POP: { JitOptRef flag; flag = stack_pointer[-1]; + sym_apply_predicate_narrowing(ctx, flag, false); if (sym_is_const(ctx, flag)) { PyObject *value = sym_get_const(ctx, flag); assert(value != NULL); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 5f5086d33b5c4c..a9640aaa5072c5 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -25,24 +25,24 @@ state represents no information, and the BOTTOM state represents contradictory information. Though symbols logically progress through all intermediate nodes, we often skip in-between states for convenience: - UNKNOWN - | | -NULL | -| | <- Anything below this level is an object. -| NON_NULL-+ -| | | <- Anything below this level has a known type version. -| TYPE_VERSION | -| | | <- Anything below this level has a known type. -| KNOWN_CLASS | -| | | | | | -| | | INT* | | -| | | | | | <- Anything below this level has a known truthiness. -| | | | | TRUTHINESS -| | | | | | -| TUPLE | | | | -| | | | | | <- Anything below this level is a known constant. -| KNOWN_VALUE--+ -| | <- Anything below this level is unreachable. + UNKNOWN-------------------+ + | | | +NULL | | +| | | <- Anything below this level is an object. +| NON_NULL-+ | +| | | | <- Anything below this level has a known type version. +| TYPE_VERSION | | +| | | | <- Anything below this level has a known type. +| KNOWN_CLASS | | +| | | | | | PREDICATE +| | | INT* | | | +| | | | | | | <- Anything below this level has a known truthiness. +| | | | | TRUTHINESS | +| | | | | | | +| TUPLE | | | | | +| | | | | | | <- Anything below this level is a known constant. +| KNOWN_VALUE--+----------+ +| | <- Anything below this level is unreachable. BOTTOM For example, after guarding that the type of an UNKNOWN local is int, we can @@ -309,6 +309,7 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) sym->cls.version = 0; sym->cls.type = typ; return; + case JIT_SYM_PREDICATE_TAG: case JIT_SYM_TRUTHINESS_TAG: if (typ != &PyBool_Type) { sym_set_bottom(ctx, sym); @@ -370,6 +371,7 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef ref, unsigned int ver sym->tag = JIT_SYM_TYPE_VERSION_TAG; sym->version.version = version; return true; + case JIT_SYM_PREDICATE_TAG: case JIT_SYM_TRUTHINESS_TAG: if (version != PyBool_Type.tp_version_tag) { sym_set_bottom(ctx, sym); @@ -436,6 +438,13 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef ref, PyObject *const_val) case JIT_SYM_UNKNOWN_TAG: make_const(sym, const_val); return; + case JIT_SYM_PREDICATE_TAG: + if (!PyBool_Check(const_val)) { + sym_set_bottom(ctx, sym); + return; + } + make_const(sym, const_val); + return; case JIT_SYM_TRUTHINESS_TAG: if (!PyBool_Check(const_val) || (_Py_uop_sym_is_const(ctx, ref) && @@ -589,6 +598,7 @@ _Py_uop_sym_get_type(JitOptRef ref) return _PyType_LookupByVersion(sym->version.version); case JIT_SYM_TUPLE_TAG: return &PyTuple_Type; + case JIT_SYM_PREDICATE_TAG: case JIT_SYM_TRUTHINESS_TAG: return &PyBool_Type; case JIT_SYM_COMPACT_INT: @@ -617,6 +627,7 @@ _Py_uop_sym_get_type_version(JitOptRef ref) return Py_TYPE(sym->value.value)->tp_version_tag; case JIT_SYM_TUPLE_TAG: return PyTuple_Type.tp_version_tag; + case JIT_SYM_PREDICATE_TAG: case JIT_SYM_TRUTHINESS_TAG: return PyBool_Type.tp_version_tag; case JIT_SYM_COMPACT_INT: @@ -810,6 +821,7 @@ _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef ref) } return; case JIT_SYM_TUPLE_TAG: + case JIT_SYM_PREDICATE_TAG: case JIT_SYM_TRUTHINESS_TAG: sym_set_bottom(ctx, sym); return; @@ -823,6 +835,70 @@ _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef ref) } } +JitOptRef +_Py_uop_sym_new_predicate(JitOptContext *ctx, JitOptRef lhs_ref, JitOptRef rhs_ref, JitOptPredicateKind kind) +{ + JitOptSymbol *lhs = PyJitRef_Unwrap(lhs_ref); + JitOptSymbol *rhs = PyJitRef_Unwrap(rhs_ref); + + JitOptSymbol *res = sym_new(ctx); + if (res == NULL) { + return out_of_space_ref(ctx); + } + + res->tag = JIT_SYM_PREDICATE_TAG; + res->predicate.kind = kind; + res->predicate.lhs = (uint16_t)(lhs - allocation_base(ctx)); + res->predicate.rhs = (uint16_t)(rhs - allocation_base(ctx)); + + return PyJitRef_Wrap(res); +} + +void +_Py_uop_sym_apply_predicate_narrowing(JitOptContext *ctx, JitOptRef ref, bool branch_is_true) +{ + JitOptSymbol *sym = PyJitRef_Unwrap(ref); + if (sym->tag != JIT_SYM_PREDICATE_TAG) { + return; + } + + JitOptPredicate pred = sym->predicate; + + JitOptRef lhs_ref = PyJitRef_Wrap(allocation_base(ctx) + pred.lhs); + JitOptRef rhs_ref = PyJitRef_Wrap(allocation_base(ctx) + pred.rhs); + + bool lhs_is_const = _Py_uop_sym_is_const(ctx, lhs_ref); + bool rhs_is_const = _Py_uop_sym_is_const(ctx, rhs_ref); + if (!lhs_is_const && !rhs_is_const) { + return; + } + + bool narrow = false; + switch(pred.kind) { + case JIT_PRED_IS: + narrow = branch_is_true; + break; + case JIT_PRED_IS_NOT: + narrow = !branch_is_true; + break; + default: + return; + } + if (!narrow) { + return; + } + + JitOptRef subject_ref = lhs_is_const ? rhs_ref : lhs_ref; + JitOptRef const_ref = lhs_is_const ? lhs_ref : rhs_ref; + + PyObject *const_val = _Py_uop_sym_get_const(ctx, const_ref); + if (const_val == NULL) { + return; + } + _Py_uop_sym_set_const(ctx, subject_ref, const_val); + assert(_Py_uop_sym_is_const(ctx, subject_ref)); +} + JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef ref, bool truthy) { @@ -1159,6 +1235,85 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(_Py_uop_sym_is_const(ctx, value) == true, "value is not constant"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, value) == Py_True, "value is not True"); + // Resolving predicate result to True should narrow subject to True + JitOptRef subject = _Py_uop_sym_new_unknown(ctx); + JitOptRef const_true = _Py_uop_sym_new_const(ctx, Py_True); + if (PyJitRef_IsNull(subject) || PyJitRef_IsNull(const_true)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_true, JIT_PRED_IS); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == Py_True, "predicate narrowing did not narrow subject to True"); + + // Resolving predicate result to False should not narrow subject + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_true, JIT_PRED_IS); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, false); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, subject), "predicate narrowing incorrectly narrowed subject"); + + // Resolving inverted predicate to False should narrow subject to True + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_true, JIT_PRED_IS_NOT); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, false); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing (inverted) did not const-narrow subject"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == Py_True, "predicate narrowing (inverted) did not narrow subject to True"); + + // Resolving inverted predicate to True should not narrow subject + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_true, JIT_PRED_IS_NOT); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, subject), "predicate narrowing incorrectly narrowed subject (inverted/true)"); + + // Test narrowing subject to None + subject = _Py_uop_sym_new_unknown(ctx); + JitOptRef const_none = _Py_uop_sym_new_const(ctx, Py_None); + if (PyJitRef_IsNull(subject) || PyJitRef_IsNull(const_none)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_none, JIT_PRED_IS); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (None)"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == Py_None, "predicate narrowing did not narrow subject to None"); + + // Test narrowing subject to numerical constant + subject = _Py_uop_sym_new_unknown(ctx); + PyObject *one_obj = PyLong_FromLong(1); + JitOptRef const_one = _Py_uop_sym_new_const(ctx, one_obj); + if (PyJitRef_IsNull(subject) || PyJitRef_IsNull(const_one)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_one, JIT_PRED_IS); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (1)"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == one_obj, "predicate narrowing did not narrow subject to 1"); val_big = PyNumber_Lshift(_PyLong_GetOne(), PyLong_FromLong(66)); if (val_big == NULL) { diff --git a/Python/pystate.c b/Python/pystate.c index 89374e16722494..19f1245d60a2f8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -24,7 +24,6 @@ #include "pycore_stackref.h" // Py_STACKREF_DEBUG #include "pycore_stats.h" // FT_STAT_WORLD_STOP_INC() #include "pycore_time.h" // _PyTime_Init() -#include "pycore_uop.h" // UOP_BUFFER_SIZE #include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts()