Skip to content

Commit 2476010

Browse files
habermancopybara-github
authored andcommitted
Removed fasttable specialization for sub-message size.
This will (temporarily) reduce the performance of sub-message parsing, but with the intention of reclaiming it later through runtime branches (like we do for strings). I was leaning in this direction already, for several reasons: 1. The old code broke the encapsulation of `upb_Arena`. Going forward we'd like all arena manipulation to go through formally-defined interfaces in `arena.h`. 2. Specializing the table on sub-message size creates a tight coupling between the generated code for a message and its sub-message. If the sub-message changed size and the containing message was not regenerated, we could get buffer overflows. While we generally do not support ABI stability across generated code versions, this would be an especially painful issue to debug if it ever did occur. 3. Specializing field parsers on message size majorly increases the number of fast parser functions, increasing code size overhead. This change should noticeably shrink the code size overhead of switching on fasttable parsing. This CL shrinks the overhead of fasttable by ~36Ki: ``` $ bloaty --domain=vm blaze-bin/third_party/upb/upb/wire/decode_fast/libfield_parsers.a -- /google/src/cloud/haberman/clean/google3/blaze-bin/third_party/upb/upb/wire/decode_fast/libfield_parsers.a VM SIZE -------------- -19.0% -128 .rodata.cst16 -27.9% -4.09Ki .eh_frame -31.8% -31.3Ki .text -31.3% -35.6Ki TOTAL ``` PiperOrigin-RevId: 756439207
1 parent 01a7429 commit 2476010

File tree

3 files changed

+33
-85
lines changed

3 files changed

+33
-85
lines changed

upb/wire/decode_fast/combinations.h

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818
//
1919
// These can be generated in any combination, except that non-primitive types
2020
// cannot be packed. So we have:
21-
// {1bt,2bt} x {s,o,r,p} x {b,v32,v64,z32,z64,f32,f64,ce} // Primitive types.
22-
// {1bt,2bt} x {s,o,r} x {s,b} // String types.
23-
// {1bt,2bt} x {s,o,r} x {m64,m128,m192,m256,mmax} // Message types.
21+
// {1bt,2bt} x {s,o,r,p} x {b,v32,v64,z32,z64,f32,f64,ce} // Primitive
22+
// {1bt,2bt} x {s,o,r} x {s,b,m} // Non-primitive
2423

2524
#define UPB_DECODEFAST_CARDINALITIES(F, ...) \
2625
F(__VA_ARGS__, Scalar) \
@@ -39,11 +38,7 @@
3938
F(__VA_ARGS__, ClosedEnum) \
4039
F(__VA_ARGS__, String) \
4140
F(__VA_ARGS__, Bytes) \
42-
F(__VA_ARGS__, Message64) \
43-
F(__VA_ARGS__, Message128) \
44-
F(__VA_ARGS__, Message192) \
45-
F(__VA_ARGS__, Message256) \
46-
F(__VA_ARGS__, MessageBig)
41+
F(__VA_ARGS__, Message)
4742

4843
#define UPB_DECODEFAST_TAGSIZES(F, ...) \
4944
F(__VA_ARGS__, Tag1Byte) \
@@ -101,11 +96,7 @@ UPB_INLINE int upb_DecodeFast_ValueBytes(upb_DecodeFast_Type type) {
10196
case kUpb_DecodeFast_Varint64:
10297
case kUpb_DecodeFast_ZigZag64:
10398
case kUpb_DecodeFast_Fixed64:
104-
case kUpb_DecodeFast_Message64:
105-
case kUpb_DecodeFast_Message128:
106-
case kUpb_DecodeFast_Message192:
107-
case kUpb_DecodeFast_Message256:
108-
case kUpb_DecodeFast_MessageBig:
99+
case kUpb_DecodeFast_Message:
109100
return 8;
110101
case kUpb_DecodeFast_String:
111102
case kUpb_DecodeFast_Bytes:
@@ -125,21 +116,6 @@ UPB_INLINE bool upb_DecodeFast_IsZigZag(upb_DecodeFast_Type type) {
125116
}
126117
}
127118

128-
UPB_INLINE int upb_DecodeFast_MessageCeilingBytes(upb_DecodeFast_Type type) {
129-
switch (type) {
130-
case kUpb_DecodeFast_Message64:
131-
return 64;
132-
case kUpb_DecodeFast_Message128:
133-
return 128;
134-
case kUpb_DecodeFast_Message192:
135-
return 192;
136-
case kUpb_DecodeFast_Message256:
137-
return 256;
138-
default:
139-
return 0;
140-
}
141-
}
142-
143119
// The canonical ordering of functions, used by the arrays of function pointers
144120
// and function names. This macro generates the full cross product of the
145121
// cardinality, type, and tag size enums.

upb/wire/decode_fast/field_message.c

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,11 @@
2424
#include "upb/port/def.inc"
2525

2626
UPB_INLINE
27-
upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* m,
28-
upb_DecodeFast_Type type) {
27+
upb_Message* decode_newmsg(upb_Decoder* d, const upb_MiniTable* m) {
2928
size_t size = m->UPB_PRIVATE(size);
30-
char* msg_data;
31-
size_t msg_ceil_bytes = upb_DecodeFast_MessageCeilingBytes(type);
32-
if (UPB_LIKELY(type != kUpb_DecodeFast_MessageBig &&
33-
UPB_PRIVATE(_upb_ArenaHas)(&d->arena) >= msg_ceil_bytes)) {
34-
UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
35-
msg_data = d->arena.UPB_PRIVATE(ptr);
36-
// TODO: fix for Xsan
37-
d->arena.UPB_PRIVATE(ptr) += size;
38-
memset(msg_data, 0, msg_ceil_bytes);
39-
} else {
40-
msg_data = (char*)upb_Arena_Malloc(&d->arena, size);
41-
memset(msg_data, 0, size);
42-
}
29+
// OPT: specialize for message size
30+
char* msg_data = (char*)upb_Arena_Malloc(&d->arena, size);
31+
memset(msg_data, 0, size);
4332
return (upb_Message*)msg_data;
4433
}
4534

@@ -99,7 +88,7 @@ const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, const char* ptr,
9988
submsg.msg = *dst; \
10089
\
10190
if (card == kUpb_DecodeFast_Repeated || UPB_LIKELY(!submsg.msg)) { \
102-
*dst = submsg.msg = decode_newmsg_ceil(d, subtablep, type); \
91+
*dst = submsg.msg = decode_newmsg(d, subtablep); \
10392
} \
10493
\
10594
ptr += tagbytes; \
@@ -136,11 +125,7 @@ const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e, const char* ptr,
136125
kUpb_DecodeFast_##tagsize); \
137126
}
138127

139-
UPB_DECODEFAST_CARDINALITIES(UPB_DECODEFAST_TAGSIZES, F, Message64)
140-
UPB_DECODEFAST_CARDINALITIES(UPB_DECODEFAST_TAGSIZES, F, Message128)
141-
UPB_DECODEFAST_CARDINALITIES(UPB_DECODEFAST_TAGSIZES, F, Message192)
142-
UPB_DECODEFAST_CARDINALITIES(UPB_DECODEFAST_TAGSIZES, F, Message256)
143-
UPB_DECODEFAST_CARDINALITIES(UPB_DECODEFAST_TAGSIZES, F, MessageBig)
128+
UPB_DECODEFAST_CARDINALITIES(UPB_DECODEFAST_TAGSIZES, F, Message)
144129

145130
#undef F
146131
#undef FASTDECODE_SUBMSG

upb/wire/decode_fast/select.c

Lines changed: 23 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -103,44 +103,31 @@ static bool upb_DecodeFast_GetFieldType(const upb_MiniTable* m,
103103

104104
if (upb_MiniTableField_IsClosedEnum(field)) {
105105
*out_type = kUpb_DecodeFast_ClosedEnum;
106-
} else if (type == kUpb_FieldType_Message) {
107-
const upb_MiniTable* subm = upb_MiniTable_GetSubMessageTable(m, field);
108-
size_t size = subm->UPB_PRIVATE(size);
109-
if (size <= 64) {
110-
*out_type = kUpb_DecodeFast_Message64;
111-
} else if (size <= 128) {
112-
*out_type = kUpb_DecodeFast_Message128;
113-
} else if (size <= 192) {
114-
*out_type = kUpb_DecodeFast_Message192;
115-
} else if (size <= 256) {
116-
*out_type = kUpb_DecodeFast_Message256;
117-
} else {
118-
*out_type = kUpb_DecodeFast_MessageBig;
119-
}
120-
} else {
121-
static const int8_t types[] = {
122-
[kUpb_FieldType_Bool] = kUpb_DecodeFast_Bool,
123-
[kUpb_FieldType_Enum] = kUpb_DecodeFast_Varint32,
124-
[kUpb_FieldType_Int32] = kUpb_DecodeFast_Varint32,
125-
[kUpb_FieldType_UInt32] = kUpb_DecodeFast_Varint32,
126-
[kUpb_FieldType_Int64] = kUpb_DecodeFast_Varint64,
127-
[kUpb_FieldType_UInt64] = kUpb_DecodeFast_Varint64,
128-
[kUpb_FieldType_Fixed32] = kUpb_DecodeFast_Fixed32,
129-
[kUpb_FieldType_SFixed32] = kUpb_DecodeFast_Fixed32,
130-
[kUpb_FieldType_Float] = kUpb_DecodeFast_Fixed32,
131-
[kUpb_FieldType_Fixed64] = kUpb_DecodeFast_Fixed64,
132-
[kUpb_FieldType_SFixed64] = kUpb_DecodeFast_Fixed64,
133-
[kUpb_FieldType_Double] = kUpb_DecodeFast_Fixed64,
134-
[kUpb_FieldType_SInt32] = kUpb_DecodeFast_ZigZag32,
135-
[kUpb_FieldType_SInt64] = kUpb_DecodeFast_ZigZag64,
136-
[kUpb_FieldType_String] = kUpb_DecodeFast_String,
137-
[kUpb_FieldType_Bytes] = kUpb_DecodeFast_Bytes,
138-
};
139-
140-
UPB_ASSERT(type < UPB_ARRAY_SIZE(types));
141-
*out_type = types[type];
106+
return true;
142107
}
143108

109+
static const int8_t types[] = {
110+
[kUpb_FieldType_Bool] = kUpb_DecodeFast_Bool,
111+
[kUpb_FieldType_Enum] = kUpb_DecodeFast_Varint32,
112+
[kUpb_FieldType_Int32] = kUpb_DecodeFast_Varint32,
113+
[kUpb_FieldType_UInt32] = kUpb_DecodeFast_Varint32,
114+
[kUpb_FieldType_Int64] = kUpb_DecodeFast_Varint64,
115+
[kUpb_FieldType_UInt64] = kUpb_DecodeFast_Varint64,
116+
[kUpb_FieldType_Fixed32] = kUpb_DecodeFast_Fixed32,
117+
[kUpb_FieldType_SFixed32] = kUpb_DecodeFast_Fixed32,
118+
[kUpb_FieldType_Float] = kUpb_DecodeFast_Fixed32,
119+
[kUpb_FieldType_Fixed64] = kUpb_DecodeFast_Fixed64,
120+
[kUpb_FieldType_SFixed64] = kUpb_DecodeFast_Fixed64,
121+
[kUpb_FieldType_Double] = kUpb_DecodeFast_Fixed64,
122+
[kUpb_FieldType_SInt32] = kUpb_DecodeFast_ZigZag32,
123+
[kUpb_FieldType_SInt64] = kUpb_DecodeFast_ZigZag64,
124+
[kUpb_FieldType_String] = kUpb_DecodeFast_String,
125+
[kUpb_FieldType_Bytes] = kUpb_DecodeFast_Bytes,
126+
[kUpb_FieldType_Message] = kUpb_DecodeFast_Message,
127+
};
128+
129+
UPB_ASSERT(type < UPB_ARRAY_SIZE(types));
130+
*out_type = types[type];
144131
return true;
145132
}
146133

0 commit comments

Comments
 (0)