Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-128939: Refactor JIT optimize structs #128940

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 87 additions & 50 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,15 +148,6 @@ extern PyTypeObject _PyDefaultOptimizer_Type;
extern PyTypeObject _PyUOpExecutor_Type;
extern PyTypeObject _PyUOpOptimizer_Type;

/* Symbols */
/* See explanation in optimizer_symbols.c */

struct _Py_UopsSymbol {
int flags; // 0 bits: Top; 2 or more bits: Bottom
PyTypeObject *typ; // Borrowed reference
PyObject *const_val; // Owned reference (!)
unsigned int type_version; // currently stores type version
};

#define UOP_FORMAT_TARGET 0
#define UOP_FORMAT_JUMP 1
Expand Down Expand Up @@ -193,27 +184,72 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
// handle before rejoining the rest of the program.
#define MAX_CHAIN_DEPTH 4

typedef struct _Py_UopsSymbol _Py_UopsSymbol;
/* Symbols */
/* See explanation in optimizer_symbols.c */


typedef enum _JitSymType {
JIT_SYM_UNKNOWN_TAG = 0,

JIT_SYM_NULL_TAG = 2,
JIT_SYM_NON_NULL_TAG = 3,
JIT_SYM_BOTTOM_TAG = 4,
JIT_SYM_TYPE_VERSION_TAG = 5,
JIT_SYM_KNOWN_CLASS_TAG = 6,
JIT_SYM_KNOWN_VALUE_TAG = 7,
JIT_SYM_TUPLE_TAG = 8,
} JitSymType;

typedef struct _jit_opt_known_class {
uint8_t tag;
uint32_t version;
PyTypeObject *type;
} JitOptKnownClass;
Comment on lines +203 to +207
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could probably just get the version by doing cls.type->tp_version_tag, right? Having both here just creates opportunities for them to be stale or out-of-sync, I think.


typedef struct _jit_opt_known_version {
uint8_t tag;
uint32_t version;
} JitOptKnownVersion;

typedef struct _jit_opt_known_value {
uint8_t tag;
PyObject *value;
} JitOptKnownValue;

typedef struct _jit_opt_tuple {
uint8_t tag;
uint8_t length;
uint16_t items[6];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we have room for 7, right?

Suggested change
uint16_t items[6];
uint16_t items[7];

} JitOptTuple;

typedef union _jit_opt_symbol {
uint8_t tag;
JitOptKnownClass cls;
JitOptKnownValue value;
JitOptKnownVersion version;
JitOptTuple tuple;
} JitOptSymbol;
Comment on lines +225 to +231
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be:

Suggested change
typedef union _jit_opt_symbol {
uint8_t tag;
JitOptKnownClass cls;
JitOptKnownValue value;
JitOptKnownVersion version;
JitOptTuple tuple;
} JitOptSymbol;
typedef struct {
uint8_t tag;
union {
JitOptKnownClass cls;
JitOptKnownValue value;
JitOptKnownVersion version;
JitOptTuple tuple;
};
} JitOptSymbol;

Then we wouldn't need to repeat the tag in each struct.

Or does the current scheme potentially pack better?



struct _Py_UOpsAbstractFrame {
// Max stacklen
int stack_len;
int locals_len;

_Py_UopsSymbol **stack_pointer;
_Py_UopsSymbol **stack;
_Py_UopsSymbol **locals;
JitOptSymbol **stack_pointer;
JitOptSymbol **stack;
JitOptSymbol **locals;
};

typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;

typedef struct ty_arena {
int ty_curr_number;
int ty_max_number;
_Py_UopsSymbol arena[TY_ARENA_SIZE];
JitOptSymbol arena[TY_ARENA_SIZE];
} ty_arena;

struct _Py_UOpsContext {
typedef struct _JitOptContext {
char done;
char out_of_space;
bool contradiction;
Expand All @@ -225,46 +261,47 @@ struct _Py_UOpsContext {
// Arena for the symbolic types.
ty_arena t_arena;

_Py_UopsSymbol **n_consumed;
_Py_UopsSymbol **limit;
_Py_UopsSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
};

typedef struct _Py_UOpsContext _Py_UOpsContext;

extern bool _Py_uop_sym_is_null(_Py_UopsSymbol *sym);
extern bool _Py_uop_sym_is_not_null(_Py_UopsSymbol *sym);
extern bool _Py_uop_sym_is_const(_Py_UopsSymbol *sym);
extern PyObject *_Py_uop_sym_get_const(_Py_UopsSymbol *sym);
extern _Py_UopsSymbol *_Py_uop_sym_new_unknown(_Py_UOpsContext *ctx);
extern _Py_UopsSymbol *_Py_uop_sym_new_not_null(_Py_UOpsContext *ctx);
extern _Py_UopsSymbol *_Py_uop_sym_new_type(
_Py_UOpsContext *ctx, PyTypeObject *typ);
extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *const_val);
extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx);
extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym);
extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ);
extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version);
extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym);
extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym);
extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ);
extern bool _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version);
extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val);
extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym);
extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym);
extern PyTypeObject *_Py_uop_sym_get_type(_Py_UopsSymbol *sym);


extern void _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx);
extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx);
JitOptSymbol **n_consumed;
JitOptSymbol **limit;
JitOptSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
} JitOptContext;

extern bool _Py_uop_sym_is_null(JitOptSymbol *sym);
extern bool _Py_uop_sym_is_not_null(JitOptSymbol *sym);
extern bool _Py_uop_sym_is_const(JitOptSymbol *sym);
extern PyObject *_Py_uop_sym_get_const(JitOptSymbol *sym);
extern JitOptSymbol *_Py_uop_sym_new_unknown(JitOptContext *ctx);
extern JitOptSymbol *_Py_uop_sym_new_not_null(JitOptContext *ctx);
extern JitOptSymbol *_Py_uop_sym_new_type(
JitOptContext *ctx, PyTypeObject *typ);
extern JitOptSymbol *_Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val);
extern JitOptSymbol *_Py_uop_sym_new_null(JitOptContext *ctx);
extern bool _Py_uop_sym_has_type(JitOptSymbol *sym);
extern bool _Py_uop_sym_matches_type(JitOptSymbol *sym, PyTypeObject *typ);
extern bool _Py_uop_sym_matches_type_version(JitOptSymbol *sym, unsigned int version);
extern void _Py_uop_sym_set_null(JitOptContext *ctx, JitOptSymbol *sym);
extern void _Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptSymbol *sym);
extern void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ);
extern bool _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version);
extern void _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val);
extern bool _Py_uop_sym_is_bottom(JitOptSymbol *sym);
extern int _Py_uop_sym_truthiness(JitOptSymbol *sym);
extern PyTypeObject *_Py_uop_sym_get_type(JitOptSymbol *sym);

extern JitOptSymbol *_Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptSymbol **args);
extern JitOptSymbol *_Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptSymbol *sym, int item);
extern int _Py_uop_sym_tuple_length(JitOptSymbol *sym);

extern void _Py_uop_abstractcontext_init(JitOptContext *ctx);
extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx);

extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
_Py_UOpsContext *ctx,
JitOptContext *ctx,
PyCodeObject *co,
int curr_stackentries,
_Py_UopsSymbol **args,
JitOptSymbol **args,
int arg_len);
extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx);
extern int _Py_uop_frame_pop(JitOptContext *ctx);

PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);

Expand Down
19 changes: 19 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1465,6 +1465,25 @@ def f(l: complex, r: complex) -> None:
with self.subTest(l=l, r=r, x=x, y=y):
script_helper.assert_python_ok("-c", s)

def test_symbols_flow_through_tuples(self):
def testfunc(n):
for _ in range(n):
a = 1
b = 2
t = a, b
x, y = t
r = x + y
return r

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, 3)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_GUARD_BOTH_INT", uops)
self.assertNotIn("_GUARD_NOS_INT", uops)
self.assertNotIn("_GUARD_TOS_INT", uops)

def test_decref_escapes(self):
class Convert9999ToNone:
def __del__(self):
Expand Down
10 changes: 5 additions & 5 deletions Lib/test/test_generated_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -1842,16 +1842,16 @@ def test_overridden_abstract_args(self):
"""
output = """
case OP: {
_Py_UopsSymbol *arg1;
_Py_UopsSymbol *out;
JitOptSymbol *arg1;
JitOptSymbol *out;
arg1 = stack_pointer[-1];
out = EGGS(arg1);
stack_pointer[-1] = out;
break;
}

case OP2: {
_Py_UopsSymbol *out;
JitOptSymbol *out;
out = sym_new_not_null(ctx);
stack_pointer[-1] = out;
break;
Expand All @@ -1876,14 +1876,14 @@ def test_no_overridden_case(self):
"""
output = """
case OP: {
_Py_UopsSymbol *out;
JitOptSymbol *out;
out = sym_new_not_null(ctx);
stack_pointer[-1] = out;
break;
}

case OP2: {
_Py_UopsSymbol *out;
JitOptSymbol *out;
out = NULL;
stack_pointer[-1] = out;
break;
Expand Down
2 changes: 1 addition & 1 deletion Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy()
#include "pycore_long.h" // _PyLong_Sign()
#include "pycore_object.h" // _PyObject_IsFreed()
#include "pycore_optimizer.h" // _Py_UopsSymbol, etc.
#include "pycore_optimizer.h" // JitOptSymbol, etc.
#include "pycore_pathconfig.h" // _PyPathConfig_ClearGlobal()
#include "pycore_pyerrors.h" // _PyErr_ChainExceptions1()
#include "pycore_pylifecycle.h" // _PyInterpreterConfig_AsDict()
Expand Down
15 changes: 9 additions & 6 deletions Python/optimizer_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,13 +368,16 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
#define sym_truthiness _Py_uop_sym_truthiness
#define frame_new _Py_uop_frame_new
#define frame_pop _Py_uop_frame_pop
#define sym_new_tuple _Py_uop_sym_new_tuple
#define sym_tuple_getitem _Py_uop_sym_tuple_getitem
#define sym_tuple_length _Py_uop_sym_tuple_length

static int
optimize_to_bool(
_PyUOpInstruction *this_instr,
_Py_UOpsContext *ctx,
_Py_UopsSymbol *value,
_Py_UopsSymbol **result_ptr)
JitOptContext *ctx,
JitOptSymbol *value,
JitOptSymbol **result_ptr)
{
if (sym_matches_type(value, &PyBool_Type)) {
REPLACE_OP(this_instr, _NOP, 0, 0);
Expand Down Expand Up @@ -460,8 +463,8 @@ optimize_uops(
)
{

_Py_UOpsContext context;
_Py_UOpsContext *ctx = &context;
JitOptContext context;
JitOptContext *ctx = &context;
uint32_t opcode = UINT16_MAX;
int curr_space = 0;
int max_space = 0;
Expand All @@ -486,7 +489,7 @@ optimize_uops(

int oparg = this_instr->oparg;
opcode = this_instr->opcode;
_Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer;
JitOptSymbol **stack_pointer = ctx->frame->stack_pointer;

#ifdef Py_DEBUG
if (get_lltrace() >= 3) {
Expand Down
49 changes: 33 additions & 16 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@

#define op(name, ...) /* NAME is ignored */

typedef struct _Py_UopsSymbol _Py_UopsSymbol;
typedef struct _Py_UOpsContext _Py_UOpsContext;
typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;

/* Shortened forms for convenience */
Expand All @@ -32,13 +30,16 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
#define sym_is_bottom _Py_uop_sym_is_bottom
#define frame_new _Py_uop_frame_new
#define frame_pop _Py_uop_frame_pop
#define sym_new_tuple _Py_uop_sym_new_tuple
#define sym_tuple_getitem _Py_uop_sym_tuple_getitem
#define sym_tuple_length _Py_uop_sym_tuple_length

extern int
optimize_to_bool(
_PyUOpInstruction *this_instr,
_Py_UOpsContext *ctx,
_Py_UopsSymbol *value,
_Py_UopsSymbol **result_ptr);
JitOptContext *ctx,
JitOptSymbol *value,
JitOptSymbol **result_ptr);

extern void
eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit);
Expand All @@ -50,17 +51,17 @@ dummy_func(void) {

PyCodeObject *co;
int oparg;
_Py_UopsSymbol *flag;
_Py_UopsSymbol *left;
_Py_UopsSymbol *right;
_Py_UopsSymbol *value;
_Py_UopsSymbol *res;
_Py_UopsSymbol *iter;
_Py_UopsSymbol *top;
_Py_UopsSymbol *bottom;
JitOptSymbol *flag;
JitOptSymbol *left;
JitOptSymbol *right;
JitOptSymbol *value;
JitOptSymbol *res;
JitOptSymbol *iter;
JitOptSymbol *top;
JitOptSymbol *bottom;
_Py_UOpsAbstractFrame *frame;
_Py_UOpsAbstractFrame *new_frame;
_Py_UOpsContext *ctx;
JitOptContext *ctx;
_PyUOpInstruction *this_instr;
_PyBloomFilter *dependencies;
int modified;
Expand All @@ -85,7 +86,7 @@ dummy_func(void) {

op(_LOAD_FAST_AND_CLEAR, (-- value)) {
value = GETLOCAL(oparg);
_Py_UopsSymbol *temp = sym_new_null(ctx);
JitOptSymbol *temp = sym_new_null(ctx);
GETLOCAL(oparg) = temp;
}

Expand Down Expand Up @@ -365,7 +366,7 @@ dummy_func(void) {
}

op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) {
_Py_UopsSymbol *res;
JitOptSymbol *res;
if (sym_is_const(left) && sym_is_const(right) &&
sym_matches_type(left, &PyUnicode_Type) && sym_matches_type(right, &PyUnicode_Type)) {
PyObject *temp = PyUnicode_Concat(sym_get_const(left), sym_get_const(right));
Expand Down Expand Up @@ -949,6 +950,22 @@ dummy_func(void) {
res = sym_new_const(ctx, Py_True);
}

op(_BUILD_TUPLE, (values[oparg] -- tup)) {
tup = sym_new_tuple(ctx, oparg, values);
}

op(_UNPACK_SEQUENCE_TWO_TUPLE, (seq -- val1, val0)) {
val0 = sym_tuple_getitem(ctx, seq, 0);
val1 = sym_tuple_getitem(ctx, seq, 1);
}

op(_UNPACK_SEQUENCE_TUPLE, (seq -- values[oparg])) {
for (int i = 0; i < oparg; i++) {
values[i] = sym_tuple_getitem(ctx, seq, i);
}
}


// END BYTECODES //

}
Loading
Loading