Skip to content

Commit e3780a9

Browse files
vef: add stateful encoder to Field/Item (#66)
TypeEncoder ia a per-Field or Item object that holds encoding state across rows. Previously, every encode call allocated a new output buffer from mem_root; now a single scratch buffer (sized to persisted_length) is allocated once and reused for the lifetime of the Field's TABLE or the Item's query execution. For VDF-based encode ops, the VDF call structures (vef_invalue_t, vef_vdf_args_t, vef_vdf_result_t) are pre-filled at construction time, so only the per-row fields (input pointer/length, result type) are touched on each call. TypeEncoder is lazily created via GetTypeEncoderFor and cached on the Field or Item. Item::cleanup() nulls the pointer between prepared-statement re-executions so a fresh encoder is allocated on the new thd->mem_root next execution. Temporary table encoders are tied to the TABLE_SHARE::mem_root of the temporary table. The public encode API in util.h is simplified: callers no longer pass MEM_ROOT, field_name, or TypeContext — the encoder resolves these from the Field or Item itself.
1 parent 53fd68f commit e3780a9

17 files changed

Lines changed: 485 additions & 159 deletions
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
CREATE TABLE t1 (id INT, val COMPLEX);
2+
INSERT INTO t1 VALUES (1,'(1.0,2.0)'),(2,'(3.0,4.0)'),(3,'(5.0,6.0)'),(4,'(1.0,2.0)');
3+
PREPARE stmt FROM 'SELECT id FROM t1 WHERE val = ? ORDER BY id';
4+
SET @p = '(1,2)';
5+
EXECUTE stmt USING @p;
6+
id
7+
1
8+
4
9+
SET @p = '(3,4)';
10+
EXECUTE stmt USING @p;
11+
id
12+
2
13+
SET @p = '(5,6)';
14+
EXECUTE stmt USING @p;
15+
id
16+
3
17+
DEALLOCATE PREPARE stmt;
18+
DROP TABLE t1;
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Test that EncodeContext on Item_param is correctly reset between PS
2+
# re-executions. Item_param persists across executions on the PS mem_root,
3+
# while thd->mem_root (where the EncodeContext is allocated) is cleared between
4+
# executions. Item::cleanup() nulls encode_context_ at execution end so the
5+
# next execution lazily re-allocates on the fresh thd->mem_root. Without this
6+
# reset, re-execution would use a dangling pointer and produce wrong results.
7+
8+
--source include/villagesql/install_complex_extension.inc
9+
10+
CREATE TABLE t1 (id INT, val COMPLEX);
11+
INSERT INTO t1 VALUES (1,'(1.0,2.0)'),(2,'(3.0,4.0)'),(3,'(5.0,6.0)'),(4,'(1.0,2.0)');
12+
13+
# Execute the same prepared statement three times with different parameter
14+
# values. Each execution must encode the new string correctly using a fresh
15+
# EncodeContext, not a stale pointer from a prior execution.
16+
PREPARE stmt FROM 'SELECT id FROM t1 WHERE val = ? ORDER BY id';
17+
18+
SET @p = '(1,2)';
19+
EXECUTE stmt USING @p;
20+
21+
SET @p = '(3,4)';
22+
EXECUTE stmt USING @p;
23+
24+
SET @p = '(5,6)';
25+
EXECUTE stmt USING @p;
26+
27+
DEALLOCATE PREPARE stmt;
28+
29+
DROP TABLE t1;
30+
31+
--source include/villagesql/uninstall_complex_extension.inc
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
CREATE TABLE t1 (id INT, val COMPLEX);
2+
INSERT INTO t1 VALUES (1,'(1.0,2.0)'),(2,'(3.0,4.0)'),(3,'(5.0,6.0)'),(4,'(1.0,2.0)');
3+
SELECT t_outer.id, t_outer.val
4+
FROM t1 AS t_outer
5+
WHERE t_outer.val = (SELECT t_inner.val FROM t1 AS t_inner
6+
WHERE t_inner.id = t_outer.id LIMIT 1);
7+
id val
8+
1 (1,2)
9+
2 (3,4)
10+
3 (5,6)
11+
4 (1,2)
12+
SELECT t_outer.id, t_outer.val
13+
FROM t1 AS t_outer
14+
WHERE t_outer.val IN (SELECT t_inner.val FROM t1 AS t_inner
15+
WHERE t_inner.id != t_outer.id);
16+
id val
17+
1 (1,2)
18+
4 (1,2)
19+
DROP TABLE t1;
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CREATE TABLE t1 (val vsql_complex.COMPLEX);
2+
INSERT INTO t1 VALUES ('(1.0, 2.0)'), ('(3.0, 4.0)');
3+
# String literal unioned with COMPLEX column: encoder runs on tmp table field
4+
SELECT '(5.0, 6.0)' AS val UNION SELECT val FROM t1 ORDER BY val;
5+
val
6+
(1,2)
7+
(3,4)
8+
(5,6)
9+
DROP TABLE t1;
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Test correlated subqueries where outer and inner scans reference the same
2+
# table. MySQL must open two TABLE instances from the same TABLE_SHARE, giving
3+
# each its own cloned Field objects and independent EncodeContext/buffer. If
4+
# the same TABLE instance were incorrectly reused for both scans, buffer
5+
# aliasing would corrupt encoded values and produce wrong comparison results.
6+
7+
--source include/villagesql/install_complex_extension.inc
8+
9+
CREATE TABLE t1 (id INT, val COMPLEX);
10+
INSERT INTO t1 VALUES (1,'(1.0,2.0)'),(2,'(3.0,4.0)'),(3,'(5.0,6.0)'),(4,'(1.0,2.0)');
11+
12+
# Scalar correlated subquery on the same table.
13+
# Inner scan encodes val for each row with matching id; outer scan compares.
14+
# Expected: all 4 rows (outer val always equals inner val for same id).
15+
# If the outer and inner scans shared a buffer, the inner scan would overwrite
16+
# the outer val before the comparison, producing wrong results.
17+
SELECT t_outer.id, t_outer.val
18+
FROM t1 AS t_outer
19+
WHERE t_outer.val = (SELECT t_inner.val FROM t1 AS t_inner
20+
WHERE t_inner.id = t_outer.id LIMIT 1);
21+
22+
# IN correlated subquery on the same table.
23+
# Expected: rows 1 and 4 only (both have val=(1,2); each has a peer with a
24+
# different id that also has val=(1,2)).
25+
# If buffer aliasing occurred, cross-row comparisons would be wrong.
26+
SELECT t_outer.id, t_outer.val
27+
FROM t1 AS t_outer
28+
WHERE t_outer.val IN (SELECT t_inner.val FROM t1 AS t_inner
29+
WHERE t_inner.id != t_outer.id);
30+
31+
DROP TABLE t1;
32+
33+
--source include/villagesql/uninstall_complex_extension.inc
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Regression test: encoding a string literal into a UNION result tmp table's
2+
# COMPLEX field must not allocate the TypeEncoder on TABLE::mem_root.
3+
#
4+
# A UNION where one arm is a string literal and the other is a COMPLEX column
5+
# forces GetTypeEncoderFor() to run on the UNION result tmp table's Field.
6+
# With the bug, close_tmp_table() asserts table->mem_root.allocated_size() == 0
7+
# in debug builds because the encoder was allocated there.
8+
9+
--source include/villagesql/install_complex_extension.inc
10+
11+
CREATE TABLE t1 (val vsql_complex.COMPLEX);
12+
INSERT INTO t1 VALUES ('(1.0, 2.0)'), ('(3.0, 4.0)');
13+
14+
--echo # String literal unioned with COMPLEX column: encoder runs on tmp table field
15+
SELECT '(5.0, 6.0)' AS val UNION SELECT val FROM t1 ORDER BY val;
16+
17+
DROP TABLE t1;
18+
19+
--source include/villagesql/uninstall_complex_extension.inc

sql/field.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@
6363
#include "template_utils.h"
6464
#include "villagesql/schema/descriptor/type_context.h"
6565

66+
namespace villagesql {
67+
class TypeEncoder;
68+
} // namespace villagesql
69+
6670
class Create_field;
6771
class CostOfItem;
6872
class Field;
@@ -1885,14 +1889,21 @@ class Field {
18851889
const uchar *unpack_int64(uchar *to, const uchar *from) const;
18861890

18871891
private:
1892+
// TODO(villagesql): Collapse these into one object (here and in Item)
18881893
const villagesql::TypeContext *custom_type{nullptr};
1894+
villagesql::TypeEncoder *type_encoder_{nullptr};
18891895

18901896
public:
18911897
const villagesql::TypeContext *get_type_context() const {
18921898
return custom_type;
18931899
}
18941900
void set_type_context(const villagesql::TypeContext *tc) { custom_type = tc; }
18951901
bool has_type_context() const { return nullptr != custom_type; }
1902+
1903+
villagesql::TypeEncoder *get_type_encoder() const { return type_encoder_; }
1904+
void set_type_encoder(villagesql::TypeEncoder *encoder) {
1905+
type_encoder_ = encoder;
1906+
}
18961907
};
18971908

18981909
/**

sql/item.cc

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4331,8 +4331,7 @@ type_conversion_status Item_param::save_in_field_inner(Field *field,
43314331
return TYPE_OK;
43324332
case MYSQL_TYPE_VARCHAR: {
43334333
if (field->has_type_context()) {
4334-
return villagesql::EncodeAndStoreStringToCustomField(
4335-
*field->get_type_context(), str_value, field);
4334+
return villagesql::EncodeAndStoreStringToCustomField(str_value, field);
43364335
}
43374336
return field->store(str_value.ptr(), str_value.length(),
43384337
str_value.charset());
@@ -4492,8 +4491,8 @@ String *Item_param::val_str(String *str) {
44924491
switch (data_type_actual()) {
44934492
case MYSQL_TYPE_VARCHAR: {
44944493
if (has_type_context()) {
4495-
return villagesql::EncodeStringForCustomParam(
4496-
*get_type_context(), str_value_ptr, item_name.ptr(), null_value);
4494+
return villagesql::EncodeStringForCustomParam(this, str_value_ptr,
4495+
null_value);
44974496
}
44984497
return &str_value_ptr;
44994498
}
@@ -7062,9 +7061,8 @@ type_conversion_status Item::save_in_field_inner(Field *field,
70627061
// For custom types, encode the string value before storing
70637062
if (!has_type_context() && field->has_type_context()) {
70647063
bool is_valid = false;
7065-
String *encoded = villagesql::EncodeStringForField(
7066-
*field->get_type_context(), *result, *current_thd->mem_root,
7067-
field->field_name, is_valid);
7064+
String *encoded =
7065+
villagesql::EncodeStringForField(field, *result, is_valid);
70687066
if (encoded == nullptr) {
70697067
str_value.set_quick(nullptr, 0, cs);
70707068
return is_valid ? TYPE_ERR_OOM : TYPE_ERR_BAD_VALUE;
@@ -7113,8 +7111,8 @@ type_conversion_status Item_string::save_in_field_inner(Field *field, bool) {
71137111

71147112
// Encode str_value into a new representation, based on the type.
71157113
bool is_oom = false;
7116-
String *encoded = villagesql::EncodeStringForField(
7117-
*tc, str_value, *current_thd->mem_root, field->field_name, is_oom);
7114+
String *encoded =
7115+
villagesql::EncodeStringForField(field, str_value, is_oom);
71187116
if (encoded == nullptr) {
71197117
return is_oom ? TYPE_ERR_OOM : TYPE_ERR_BAD_VALUE;
71207118
}

sql/item.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@
7979
#include "template_utils.h"
8080
#include "villagesql/include/item_helpers.h"
8181

82+
namespace villagesql {
83+
class TypeEncoder;
84+
} // namespace villagesql
85+
8286
class Item;
8387
class Item_field;
8488
class Item_func;
@@ -1277,7 +1281,10 @@ class Item : public Parse_tree_node {
12771281
Prepare for new execution by clearing cached values.
12781282
Do not remove values allocated during preparation, destructor handles this.
12791283
*/
1280-
virtual void cleanup() { marker = MARKER_NONE; }
1284+
virtual void cleanup() {
1285+
marker = MARKER_NONE;
1286+
type_encoder_ = nullptr;
1287+
}
12811288
/**
12821289
Called when an item has been removed, can be used to notify external
12831290
objects about the removal, e.g subquery predicates that are part of
@@ -3748,13 +3755,18 @@ class Item : public Parse_tree_node {
37483755

37493756
protected:
37503757
const villagesql::TypeContext *custom_type{nullptr};
3758+
villagesql::TypeEncoder *type_encoder_{nullptr};
37513759

37523760
public:
37533761
virtual const villagesql::TypeContext *get_type_context() const {
37543762
return custom_type;
37553763
}
37563764
void set_type_context(const villagesql::TypeContext *tc) { custom_type = tc; }
37573765
virtual bool has_type_context() const { return nullptr != custom_type; }
3766+
villagesql::TypeEncoder *get_type_encoder() const { return type_encoder_; }
3767+
void set_type_encoder(villagesql::TypeEncoder *encoder) {
3768+
type_encoder_ = encoder;
3769+
}
37583770
};
37593771

37603772
/**

sql/sql_load.cc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,9 +1482,8 @@ bool Sql_cmd_load_table::read_sep_field(THD *thd, COPY_INFO &info,
14821482
if (field->has_type_context()) {
14831483
String input_str((char *)pos, length, read_info.read_charset);
14841484
bool is_valid = false;
1485-
String *encoded = villagesql::EncodeStringForField(
1486-
*field->get_type_context(), input_str, *thd->mem_root,
1487-
field->field_name, is_valid);
1485+
String *encoded =
1486+
villagesql::EncodeStringForField(field, input_str, is_valid);
14881487
if (encoded == nullptr) {
14891488
if (is_valid) return true; // OOM case
14901489
// Encoding failed - in strict mode, fail; otherwise skip row
@@ -1733,9 +1732,8 @@ bool Sql_cmd_load_table::read_xml_field(THD *thd, COPY_INFO &info,
17331732
if (field->has_type_context()) {
17341733
String input_str(tag->value.ptr(), tag->value.length(), cs);
17351734
bool is_valid = false;
1736-
String *encoded = villagesql::EncodeStringForField(
1737-
*field->get_type_context(), input_str, *thd->mem_root,
1738-
field->field_name, is_valid);
1735+
String *encoded =
1736+
villagesql::EncodeStringForField(field, input_str, is_valid);
17391737
if (encoded == nullptr) {
17401738
if (is_valid) return true; // OOM case
17411739
// Encoding failed - in strict mode, fail; otherwise skip row

0 commit comments

Comments
 (0)