Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,43 @@ CGP_ALWAYS_INLINE BOOL CGPJavaTypeIsEnum(CGPFieldJavaType type) {
return type == ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_ENUM;
}

// Gets the integer value of an enum.
//
// This function is updated to support preservation of unknown enum values in proto3.
//
// How it works:
// - Valid enums are pointers to static singleton objects. Since objects are aligned
// to at least 8 bytes on 64-bit systems, the lowest bit of a valid pointer is always 0.
// - Unrecognized enum values are stored by shifting the raw 32-bit value into the
// upper 32 bits and setting the lowest bit to 1 (tagging it).
// Representation: (raw_value << 32) | 0x1
//
// Why this is safe:
// 1. ARC (Automatic Reference Counting): The J2ObjC runtime does not retain/release
// enum fields (see CGPIsRetainedType in Descriptors.m returning NO for enums).
// Therefore, storing a non-pointer value in the id field will not cause ARC to
// attempt to dereference it and crash.
// 2. PAC (Pointer Authentication Codes): PAC uses the upper bits of pointers on
// ARM64e to store a signature. We do not touch the upper bits of VALID pointers.
// We only use the upper bits when the value is NOT a pointer (indicated by the
// lowest bit being 1). Since we don't try to authenticate this tagged value as a
// pointer, PAC is not triggered.
//
// Why this is correct:
// - In proto3, the 0 value is always defined (required by spec). Thus, an unknown
// value can never be 0. The shifted raw value in the upper 32 bits will therefore
// always be non-zero for unknown values, ensuring we can distinguish it from a
// valid pointer where the upper 32 bits are naturally zero.
CGP_ALWAYS_INLINE jint CGPEnumGetIntValue(CGPEnumDescriptor *descriptor, id enumObj) {
uintptr_t stored_val = (uintptr_t)(ARCBRIDGE void *)enumObj;

// Check the lowest bit. If it is 1, it is an unrecognized tagged value.
if (stored_val & 0x1) {
// Return the raw value stored in the upper 32 bits.
return (jint)(stored_val >> 32);
}

// Otherwise, it is a valid pointer to a static enum singleton.
return *(jint *)((char *)(ARCBRIDGE void *)enumObj + descriptor->valueOffset_);
}

Expand Down
95 changes: 91 additions & 4 deletions protobuf/runtime/src/com/google/protobuf/GeneratedMessage.mm
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,44 @@ static void SingularSetRetainable(id msg, TYPE_Retainable value, size_t offset,

#undef REPEATED_GETTER_IMP

// Getter for singular enum fields. Intercepts tagged pointers for unknown values
// and returns the UNRECOGNIZED singleton instead.
static IMP GetSingularGetterImpEnum(size_t offset, CGPHasLocator hasLoc, id defaultValue,
CGPFieldDescriptor *field) {
CGPEnumDescriptor *enumType = [field getEnumType];
id unrecognizedSingleton =
((CGPEnumValueDescriptor *)enumType->values_->buffer_[enumType->values_->size_ - 1])->enum_;

return imp_implementationWithBlock(^id(id msg) {
if (GetHas(msg, hasLoc)) {
id value = *FIELD_PTR(id, msg, offset);
uintptr_t stored_val = (uintptr_t)(__bridge void *)value;
if (stored_val & 0x1) {
return unrecognizedSingleton;
}
return value;
}
return defaultValue;
});
}

// Getter for repeated enum fields. Intercepts tagged pointers for unknown values
// and returns the UNRECOGNIZED singleton instead.
static IMP GetRepeatedGetterImpEnum(size_t offset, CGPFieldDescriptor *field) {
CGPEnumDescriptor *enumType = [field getEnumType];
id unrecognizedSingleton =
((CGPEnumValueDescriptor *)enumType->values_->buffer_[enumType->values_->size_ - 1])->enum_;

return imp_implementationWithBlock(^id(id msg, jint idx) {
id value = CGPRepeatedFieldGetId(REPEATED_FIELD_PTR(msg, offset), idx);
uintptr_t stored_val = (uintptr_t)(__bridge void *)value;
if (stored_val & 0x1) {
return unrecognizedSingleton;
}
return value;
});
}

static BOOL AddGetterMethod(Class cls, SEL sel, CGPFieldDescriptor *field) {
BOOL repeated = CGPFieldIsRepeated(field);
IMP imp = NULL;
Expand All @@ -388,7 +426,30 @@ static BOOL AddGetterMethod(Class cls, SEL sel, CGPFieldDescriptor *field) {
strcpy(encoding, @encode(TYPE_##NAME)); \
break;

SWITCH_TYPES_NO_ENUM(CGPFieldGetJavaType(field), ADD_GETTER_METHOD_CASE)
// We expand the switch manually instead of using SWITCH_TYPES_NO_ENUM
// to handle ENUM fields specially and avoid RETAIN_AND_AUTORELEASE on tagged pointers.
switch (CGPFieldGetJavaType(field)) {
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_INT:
ADD_GETTER_METHOD_CASE(Int)
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_LONG:
ADD_GETTER_METHOD_CASE(Long)
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_FLOAT:
ADD_GETTER_METHOD_CASE(Float)
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_DOUBLE:
ADD_GETTER_METHOD_CASE(Double)
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_BOOLEAN:
ADD_GETTER_METHOD_CASE(Bool)
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_STRING:
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_BYTE_STRING:
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_MESSAGE:
ADD_GETTER_METHOD_CASE(Id)
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_ENUM:
imp = repeated ? GetRepeatedGetterImpEnum(offset, field)
: GetSingularGetterImpEnum(offset, hasLoc, field->data_->defaultValue.valueId,
field);
strcpy(encoding, @encode(id));
break;
}

#undef ADD_GETTER_METHOD_CASE

Expand Down Expand Up @@ -1579,11 +1640,37 @@ static inline BOOL ReadEnumValueDescriptor(CGPCodedInputStream *input, CGPEnumDe
return YES;
}

// Reads an enum value from the stream and resolves it to a Java enum instance.
//
// This function is preserves unknown enum values in proto3 (open enums)
// by storing them as tagged integers instead of falling back to the UNRECOGNIZED
// singleton.
//
// See CGPEnumGetIntValue in Descriptors_PackagePrivate.h for details on the
// tagged representation and why it is safe with ARC and PAC.
static BOOL ReadEnumJavaValue(CGPCodedInputStream *input, CGPEnumDescriptor *enumType,
id *javaValue) {
CGPEnumValueDescriptor *valueDescriptor;
if (!ReadEnumValueDescriptor(input, enumType, &valueDescriptor)) return NO;
*javaValue = valueDescriptor == nil ? nil : valueDescriptor->enum_;
jint value;
if (!CGPReadEnum(input, &value)) return NO;

CGPEnumValueDescriptor *valueDescriptor = CGPEnumValueDescriptorFromInt(enumType, value);

if (valueDescriptor == nil) {
// Closed enum (proto2) and value was not found. We store nil.
*javaValue = nil;
} else if (!enumType->is_closed_ &&
valueDescriptor == enumType->values_->buffer_[enumType->values_->size_ - 1]) {
// Open enum (proto3) and the value was not found, so CGPEnumValueDescriptorFromInt
// returned the UNRECOGNIZED descriptor (which is always the last element in values_).
//
// We store the raw value in the upper 32 bits and set the lowest bit to 1.
// This preserves the value for serialization while remaining safe from ARC.
*javaValue = (id)(ARCBRIDGE void *)(((uintptr_t)value << 32) | 0x1);
} else {
// Found a valid known descriptor. Store the singleton pointer.
*javaValue = valueDescriptor->enum_;
}

return YES;
}

Expand Down
38 changes: 38 additions & 0 deletions protobuf/tests/Proto3EnumTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,42 @@ public void testNegativeEnumNumber() throws Exception {
Text text = Text.parseFrom(new byte[] {0x08, 0x7f}, ExtensionRegistry.getEmptyRegistry());
assertThat(text.getGreeting()).isSameInstanceAs(Greetings.UNRECOGNIZED);
}

public void testSingularParseUnknownEnumSerialization() throws Exception {
// field 1 (fruit), value 5 (unrecognized)
// Tag: (1 << 3) | 0 = 8
// Value: 5
byte[] bytes = new byte[] {0x08, 0x05};
FruitBox box = FruitBox.parseFrom(bytes, ExtensionRegistry.getEmptyRegistry());

byte[] outputBytes = box.toByteArray();
assertThat(outputBytes).isEqualTo(bytes);
}

public void testRepeatedParseUnknownEnumSerialization() throws Exception {
// field 2 (fruits), repeated, packed
// Tag: (2 << 3) | 2 = 18
// Length: 3
// Values: 1 (APPLE), 2 (BANANA), 5 (unrecognized)
byte[] bytes = new byte[] {0x12, 0x03, 0x01, 0x02, 0x05};
FruitBox box = FruitBox.parseFrom(bytes, ExtensionRegistry.getEmptyRegistry());

byte[] outputBytes = box.toByteArray();
assertThat(outputBytes).isEqualTo(bytes);
}

public void testMapParseUnknownEnumSerialization() throws Exception {
// field 3 (fruit_map), map
// Tag: (3 << 3) | 2 = 26
// Length: 4
// Map Entry:
// key: field 1, value 1 -> 0x08 0x01
// value: field 2, value 5 -> 0x10 0x05
byte[] bytes = new byte[] {0x1a, 0x04, 0x08, 0x01, 0x10, 0x05};
FruitBox box = FruitBox.parseFrom(bytes, ExtensionRegistry.getEmptyRegistry());

byte[] outputBytes = box.toByteArray();
assertThat(outputBytes).isEqualTo(bytes);
}

}
2 changes: 2 additions & 0 deletions protobuf/tests/protos/proto3_enum.proto
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ enum Fruit {

message FruitBox {
Fruit fruit = 1;
repeated Fruit fruits = 2;
map<int32, Fruit> fruit_map = 3;
}

enum Greetings {
Expand Down