Skip to content

Commit 0027d9f

Browse files
vef: add stateful TypeDecoder
TypeDecoder is an object that allocates buffers and VDF structs once per expression and reuses them across rows. It writes directly into the caller-provided String *out (following MySQL's val_str(String *buf) convention) and sets it to point at the scratch buffer — zero copies on the hot path. The returned pointer is valid until the next decode() call, which is sufficient for the protocol send path. val_external_str on both Field and Item now fills the buffer argument and returns it, consistent with val_str. One-shot decodes outside the row-reading path (e.g. Item_string::print()) use DecodeStringUncached. For the mem_root selection on Field: regular tables (NO_TMP_TABLE) use TABLE::mem_root, giving the decoder the same lifetime as its Field clone and avoiding orphaned allocations on the share. All temp table variants use TABLE_SHARE::mem_root because close_tmp_table() asserts TABLE::mem_root is empty — this covers both INTERNAL_TMP_TABLE and derived/CTE tables, which are initialized as INTERNAL_TMP_TABLE but overwritten to NON_TRANSACTIONAL_TMP_TABLE by sql_derived.cc.
1 parent afb1a66 commit 0027d9f

14 files changed

Lines changed: 392 additions & 150 deletions

File tree

sql/dd/dd_table.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,8 +358,11 @@ static void prepare_default_value_string(uchar *buf, TABLE *table,
358358
// For BINARY(0) and VARBINARY type with empty string as default value.
359359
f->val_str(&type);
360360
} else {
361-
// VillageSQL: val_external_str handles both custom and regular types
362-
f->val_external_str(&type);
361+
// VillageSQL: val_external_str handles both custom and regular types.
362+
// Copy if val_str returned a different pointer than the buffer we passed.
363+
if (const String *res = f->val_external_str(&type);
364+
res != nullptr && res != &type)
365+
type.copy(*res);
363366
}
364367

365368
if (type.length()) {

sql/field.cc

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10619,31 +10619,27 @@ const char *get_field_name_or_expression(THD *thd, const Field *field) {
1061910619
String *Field::val_external_str(String *buf) const {
1062010620
if (!has_type_context()) return val_str(buf);
1062110621

10622-
// Get the encoded data directly from the field
10623-
const uchar *encoded_data = data_ptr();
10624-
size_t encoded_length = data_length();
10625-
1062610622
bool is_valid = true;
10627-
if (villagesql::DecodeString(*get_type_context(), encoded_data,
10628-
encoded_length, *current_thd->mem_root, buf,
10629-
is_valid) &&
10630-
!is_valid) {
10631-
THD *thd = current_thd;
10632-
if (!thd->lex->is_ignore() && thd->is_strict_mode()) {
10633-
const ErrConvString errmsg(pointer_cast<const char *>(encoded_data),
10634-
encoded_length, &my_charset_bin);
10635-
const Diagnostics_area *da = thd->get_stmt_da();
10636-
push_warning_printf(thd, Sql_condition::SL_WARNING,
10637-
ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
10638-
ER_THD(thd, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
10639-
get_type_context()->type_name().c_str(), errmsg.ptr(),
10640-
this->field_name, da->current_row_for_condition());
10623+
if (!villagesql::DecodeStringForField(this, buf, is_valid)) {
10624+
if (!is_valid) {
10625+
THD *thd = current_thd;
10626+
if (!thd->lex->is_ignore() && thd->is_strict_mode()) {
10627+
const uchar *encoded_data = data_ptr();
10628+
size_t encoded_length = data_length();
10629+
const ErrConvString errmsg(pointer_cast<const char *>(encoded_data),
10630+
encoded_length, &my_charset_bin);
10631+
const Diagnostics_area *da = thd->get_stmt_da();
10632+
push_warning_printf(
10633+
thd, Sql_condition::SL_WARNING, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
10634+
ER_THD(thd, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
10635+
get_type_context()->type_name().c_str(), errmsg.ptr(),
10636+
this->field_name, da->current_row_for_condition());
10637+
}
1064110638
}
1064210639
// OOMs will just return nullptr, but have called my_error.
1064310640
return nullptr;
1064410641
}
1064510642

10646-
// Success: the decoded string is in buf.
1064710643
return buf;
1064810644
}
1064910645

sql/field.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#include "villagesql/schema/descriptor/type_context.h"
6565

6666
namespace villagesql {
67+
class TypeDecoder;
6768
class TypeEncoder;
6869
} // namespace villagesql
6970

@@ -1891,6 +1892,8 @@ class Field {
18911892
private:
18921893
const villagesql::TypeContext *custom_type{nullptr};
18931894
villagesql::TypeEncoder *type_encoder_{nullptr};
1895+
// mutable: lazily initialised by the const val_external_str() path.
1896+
mutable villagesql::TypeDecoder *type_decoder_{nullptr};
18941897

18951898
public:
18961899
const villagesql::TypeContext *get_type_context() const {
@@ -1903,6 +1906,11 @@ class Field {
19031906
void set_type_encoder(villagesql::TypeEncoder *encoder) {
19041907
type_encoder_ = encoder;
19051908
}
1909+
1910+
villagesql::TypeDecoder *get_type_decoder() const { return type_decoder_; }
1911+
void set_type_decoder(villagesql::TypeDecoder *decoder) const {
1912+
type_decoder_ = decoder;
1913+
}
19061914
};
19071915

19081916
/**

sql/item.cc

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -290,17 +290,8 @@ String *Item::val_external_str(String *str) {
290290
String *binary_data = val_str(str);
291291
if (!has_type_context() || null_value) return binary_data;
292292

293-
// Decode using TypeContext's to_string function
294293
bool is_valid = true;
295-
if (villagesql::DecodeString(
296-
*get_type_context(), pointer_cast<const uchar *>(binary_data->ptr()),
297-
binary_data->length(), *current_thd->mem_root, str, is_valid)) {
298-
if (!is_valid) {
299-
// Invalid custom type data - set null and return nullptr
300-
null_value = true;
301-
return nullptr;
302-
}
303-
// OOM or other error (my_error already called)
294+
if (!villagesql::DecodeStringForItem(this, *binary_data, str, is_valid)) {
304295
null_value = true;
305296
return nullptr;
306297
}
@@ -3631,12 +3622,11 @@ void Item_string::print(const THD *, String *str,
36313622
// Custom types: decode binary representation to string format.
36323623
if (has_type_context()) {
36333624
str->append('\'');
3634-
String decoded;
36353625
bool is_valid;
3636-
if (!villagesql::DecodeString(
3637-
*get_type_context(), (const uchar *)str_value.ptr(),
3638-
str_value.length(), *current_thd->mem_root, &decoded, is_valid)) {
3639-
str->append(decoded);
3626+
String decoded_buf;
3627+
if (villagesql::DecodeStringUncached(get_type_context(), str_value,
3628+
&decoded_buf, is_valid)) {
3629+
str->append(decoded_buf);
36403630
}
36413631
str->append('\'');
36423632
return;

sql/item.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
#include "villagesql/include/item_helpers.h"
8181

8282
namespace villagesql {
83+
class TypeDecoder;
8384
class TypeEncoder;
8485
} // namespace villagesql
8586

@@ -1284,6 +1285,7 @@ class Item : public Parse_tree_node {
12841285
virtual void cleanup() {
12851286
marker = MARKER_NONE;
12861287
type_encoder_ = nullptr;
1288+
type_decoder_ = nullptr;
12871289
}
12881290
/**
12891291
Called when an item has been removed, can be used to notify external
@@ -3756,6 +3758,7 @@ class Item : public Parse_tree_node {
37563758
protected:
37573759
const villagesql::TypeContext *custom_type{nullptr};
37583760
villagesql::TypeEncoder *type_encoder_{nullptr};
3761+
villagesql::TypeDecoder *type_decoder_{nullptr};
37593762

37603763
public:
37613764
virtual const villagesql::TypeContext *get_type_context() const {
@@ -3767,6 +3770,10 @@ class Item : public Parse_tree_node {
37673770
void set_type_encoder(villagesql::TypeEncoder *encoder) {
37683771
type_encoder_ = encoder;
37693772
}
3773+
villagesql::TypeDecoder *get_type_decoder() const { return type_decoder_; }
3774+
void set_type_decoder(villagesql::TypeDecoder *decoder) {
3775+
type_decoder_ = decoder;
3776+
}
37703777
};
37713778

37723779
/**

sql/key.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,11 @@ void field_unpack(String *to, Field *field, uint max_length, bool prefix_key) {
323323
return;
324324
}
325325
const CHARSET_INFO *cs = field->charset();
326-
// VillageSQL: val_external_str handles both custom and regular types
327-
field->val_external_str(&tmp);
326+
// VillageSQL: val_external_str handles both custom and regular types.
327+
// For custom types the return value is a different pointer, so copy it.
328+
if (const String *res = field->val_external_str(&tmp);
329+
res != nullptr && res != &tmp)
330+
tmp.copy(*res);
328331
/*
329332
For BINARY(N) strip trailing zeroes to make
330333
the error message nice-looking

sql/sql_show.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,8 +1710,11 @@ static bool print_default_clause(THD *thd, Field *field, String *def_value,
17101710
type.length(length + 1);
17111711
quoted = false;
17121712
} else {
1713-
// VillageSQL: val_external_str handles both custom and regular types
1714-
field->val_external_str(&type);
1713+
// VillageSQL: val_external_str handles both custom and regular types.
1714+
// For custom types the return value is a different pointer, so copy it.
1715+
if (const String *res = field->val_external_str(&type);
1716+
res != nullptr && res != &type)
1717+
type.copy(*res);
17151718
}
17161719

17171720
if (type.length()) {

villagesql/types/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# along with this program; if not, see <https://www.gnu.org/licenses/>.
1515

1616
SET(VILLAGESQL_TYPES_SOURCES
17+
type_decoder.cc
1718
type_encoder.cc
1819
type_op.cc
1920
util.cc

villagesql/types/type_decoder.cc

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
/* Copyright (c) 2026 VillageSQL Contributors
2+
*
3+
* This program is free software; you can redistribute it and/or
4+
* modify it under the terms of the GNU General Public License
5+
* as published by the Free Software Foundation; either version 2
6+
* of the License, or (at your option) any later version.
7+
*
8+
* This program is distributed in the hope that it will be useful,
9+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
* GNU General Public License for more details.
12+
*
13+
* You should have received a copy of the GNU General Public License
14+
* along with this program; if not, see <https://www.gnu.org/licenses/>.
15+
*/
16+
17+
#include "villagesql/types/type_decoder.h"
18+
19+
#include <cassert>
20+
#include <cstring>
21+
22+
#include "my_alloc.h"
23+
#include "my_base.h"
24+
#include "my_inttypes.h"
25+
#include "mysqld_error.h"
26+
#include "sql_string.h"
27+
#include "template_utils.h"
28+
#include "villagesql/include/error.h"
29+
#include "villagesql/schema/descriptor/type_descriptor.h"
30+
#include "villagesql/types/type_op.h"
31+
32+
namespace villagesql {
33+
34+
TypeDecoder::TypeDecoder(const TypeContext *tc, MEM_ROOT &mem_root)
35+
: mem_root_(&mem_root),
36+
buffer_size_(static_cast<size_t>(tc->max_decode_buffer_length())) {
37+
assert(tc != nullptr);
38+
assert(buffer_size_ > 0);
39+
40+
const DecodeOp &op = tc->descriptor()->decode_op();
41+
if (op.vdf() != nullptr) {
42+
vdf_ = op.vdf();
43+
assert(vdf_->prerun == nullptr && vdf_->postrun == nullptr);
44+
ctx_.protocol = VEF_PROTOCOL_2;
45+
input_[0].type = VEF_TYPE_CUSTOM;
46+
input_[0].is_null = false;
47+
vdf_args_.user_data = nullptr;
48+
vdf_args_.value_count = 1;
49+
vdf_args_.values = input_;
50+
vdf_result_.error_msg = error_msg_;
51+
vdf_result_.max_str_len = buffer_size_;
52+
vdf_result_.alt_str_buf = &alt_str_buf_;
53+
} else {
54+
fn_ = op.fn();
55+
}
56+
}
57+
58+
bool TypeDecoder::Init() {
59+
buffer_ = new (mem_root_) char[buffer_size_];
60+
if (!buffer_) {
61+
my_error(ER_OUTOFMEMORY, MYF(ME_FATALERROR), buffer_size_);
62+
return false;
63+
}
64+
if (vdf_ != nullptr) {
65+
vdf_result_.str_buf = buffer_;
66+
}
67+
return true;
68+
}
69+
70+
bool TypeDecoder::decode(const uchar *data, size_t len, String *out,
71+
bool &is_valid) {
72+
is_valid = true;
73+
74+
if (vdf_ != nullptr) {
75+
input_[0].bin_len = len;
76+
input_[0].bin_value = data;
77+
vdf_result_.type = VEF_RESULT_VALUE;
78+
vdf_result_.actual_len = 0;
79+
alt_str_buf_ = nullptr;
80+
81+
vdf_->vdf(&ctx_, &vdf_args_, &vdf_result_);
82+
83+
if (vdf_result_.type != VEF_RESULT_VALUE) {
84+
is_valid = false;
85+
return false;
86+
}
87+
88+
const size_t actual_len = vdf_result_.actual_len;
89+
90+
if (alt_str_buf_ != nullptr) {
91+
// VDF used its own buffer (output exceeded buffer_size_). Grow
92+
// overflow_buf_ if needed and reuse it across rows.
93+
if (actual_len > overflow_buf_size_) {
94+
auto *new_buf = new (mem_root_) char[actual_len];
95+
if (should_assert_if_null(new_buf)) {
96+
my_error(ER_OUTOFMEMORY, MYF(ME_FATALERROR), actual_len);
97+
return false;
98+
}
99+
overflow_buf_ = new_buf;
100+
overflow_buf_size_ = actual_len;
101+
}
102+
if (actual_len > 0) memcpy(overflow_buf_, alt_str_buf_, actual_len);
103+
out->set(overflow_buf_, actual_len, &my_charset_utf8mb4_bin);
104+
return true;
105+
}
106+
107+
// TODO(villagesql-beta): report an error or warning when the VDF overruns
108+
// the buffer rather than silently returning invalid.
109+
if (should_assert_if_false(actual_len <= buffer_size_)) {
110+
is_valid = false;
111+
return false;
112+
}
113+
out->set(buffer_, actual_len, &my_charset_utf8mb4_bin);
114+
} else {
115+
assert(fn_ != nullptr);
116+
size_t decoded_length = 0;
117+
if (fn_(data, len, buffer_, buffer_size_, &decoded_length)) {
118+
is_valid = false;
119+
return false;
120+
}
121+
// TODO(villagesql-beta): report an error or warning when the fn_ overruns
122+
// the buffer rather than silently returning invalid.
123+
if (should_assert_if_false(decoded_length <= buffer_size_)) {
124+
is_valid = false;
125+
return false;
126+
}
127+
out->set(buffer_, decoded_length, &my_charset_utf8mb4_bin);
128+
}
129+
130+
return true;
131+
}
132+
133+
} // namespace villagesql

0 commit comments

Comments
 (0)