diff --git a/bindgen-tests/tests/expectations/tests/char16_t.rs b/bindgen-tests/tests/expectations/tests/char16_t.rs new file mode 100644 index 0000000000..82d30fe517 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/char16_t.rs @@ -0,0 +1,7 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +#[repr(transparent)] +pub struct bindgen_cchar16_t(u16); +unsafe extern "C" { + #[link_name = "\u{1}_Z16receive_char16_tDs"] + pub fn receive_char16_t(input: bindgen_cchar16_t); +} diff --git a/bindgen-tests/tests/headers/char16_t.hpp b/bindgen-tests/tests/headers/char16_t.hpp new file mode 100644 index 0000000000..35e1f16dd3 --- /dev/null +++ b/bindgen-tests/tests/headers/char16_t.hpp @@ -0,0 +1,4 @@ +// bindgen-flags: --use-distinct-char16-t --raw-line '#[repr(transparent)] pub struct bindgen_cchar16_t(u16);' -- -x c++ -std=c++14 + +void receive_char16_t(char16_t input) { +} diff --git a/bindgen/codegen/helpers.rs b/bindgen/codegen/helpers.rs index 7b09ed7cfb..70f0125931 100644 --- a/bindgen/codegen/helpers.rs +++ b/bindgen/codegen/helpers.rs @@ -187,6 +187,12 @@ pub(crate) mod ast_ty { match ik { IntKind::Bool => syn::parse_quote! { bool }, IntKind::Char { .. } => raw_type(ctx, "c_char"), + // The following is used only when an unusual command-line + // argument is used. bindgen_cchar16_t is not a real type; + // but this allows downstream postprocessors to distinguish + // this case and do something special for C++ bindings + // containing char16_t. + IntKind::Char16 => syn::parse_quote! { bindgen_cchar16_t }, IntKind::SChar => raw_type(ctx, "c_schar"), IntKind::UChar => raw_type(ctx, "c_uchar"), IntKind::Short => raw_type(ctx, "c_short"), diff --git a/bindgen/ir/context.rs b/bindgen/ir/context.rs index 78790d61c4..99c75d63d8 100644 --- a/bindgen/ir/context.rs +++ b/bindgen/ir/context.rs @@ -1980,6 +1980,9 @@ If you encounter an error missing from this list, please file an issue or a PR!" CXType_Short => TypeKind::Int(IntKind::Short), CXType_UShort => TypeKind::Int(IntKind::UShort), CXType_WChar => TypeKind::Int(IntKind::WChar), + CXType_Char16 if self.options().use_distinct_char16_t => { + TypeKind::Int(IntKind::Char16) + } CXType_Char16 => TypeKind::Int(IntKind::U16), CXType_Char32 => TypeKind::Int(IntKind::U32), CXType_Long => TypeKind::Int(IntKind::Long), diff --git a/bindgen/ir/int.rs b/bindgen/ir/int.rs index 4b49931ed8..4caa6b2d06 100644 --- a/bindgen/ir/int.rs +++ b/bindgen/ir/int.rs @@ -54,9 +54,12 @@ pub enum IntKind { /// A 16-bit signed integer. I16, - /// Either a `char16_t` or a `wchar_t`. + /// A 16-bit integer, used only for enum size representation. U16, + /// Either a `char16_t` or a `wchar_t`. + Char16, + /// A 32-bit signed integer. I32, @@ -94,7 +97,7 @@ impl IntKind { // to know whether it is or not right now (unlike char, there's no // WChar_S / WChar_U). Bool | UChar | UShort | UInt | ULong | ULongLong | U8 | U16 | - WChar | U32 | U64 | U128 => false, + Char16 | WChar | U32 | U64 | U128 => false, SChar | Short | Int | Long | LongLong | I8 | I16 | I32 | I64 | I128 => true, @@ -110,7 +113,7 @@ impl IntKind { use self::IntKind::*; Some(match *self { Bool | UChar | SChar | U8 | I8 | Char { .. } => 1, - U16 | I16 => 2, + U16 | I16 | Char16 => 2, U32 | I32 => 4, U64 | I64 => 8, I128 | U128 => 16, diff --git a/bindgen/options/cli.rs b/bindgen/options/cli.rs index 8c4c05bc84..1efddb02f3 100644 --- a/bindgen/options/cli.rs +++ b/bindgen/options/cli.rs @@ -441,6 +441,9 @@ struct BindgenCommand { /// Always output explicit padding fields. #[arg(long)] explicit_padding: bool, + /// Use distinct char16_t + #[arg(long)] + use_distinct_char16_t: bool, /// Enables generation of vtable functions. #[arg(long)] vtable_generation: bool, @@ -629,6 +632,7 @@ where translate_enum_integer_types, c_naming, explicit_padding, + use_distinct_char16_t, vtable_generation, sort_semantically, merge_extern_blocks, @@ -926,6 +930,7 @@ where translate_enum_integer_types, c_naming, explicit_padding, + use_distinct_char16_t, vtable_generation, sort_semantically, merge_extern_blocks, diff --git a/bindgen/options/mod.rs b/bindgen/options/mod.rs index 9d1d195980..1a675401a4 100644 --- a/bindgen/options/mod.rs +++ b/bindgen/options/mod.rs @@ -153,6 +153,28 @@ macro_rules! options { } options! { + /// Whether we should distinguish between 'char16_t' and 'u16'. + /// As standard, bindgen represents `char16_t` as `u16`. + /// Rust does not have a `std::os::raw::c_char16_t` type, and thus + /// we can't use a built-in Rust type in the generated bindings. + /// But for some uses of bindgen, especially when downstream + /// post-processing occurs, it's important to distinguish `char16_t` + /// from normal `uint16_t`. When this option is enabled, bindgen + /// generates a fake type called `bindgen_cchar16_t`. Downstream + /// code post-processors should arrange to replace this with a + /// real type. + use_distinct_char16_t: bool { + methods: { + /// If this is true, denote 'char16_t' as a separate type from 'u16' + /// Disabled by default. + pub fn use_distinct_char16_t(mut self, doit: bool) -> Builder { + self.options.use_distinct_char16_t = doit; + self + } + }, + as_args: "--use-distinct-char16-t", + }, + /// Types that have been blocklisted and should not appear anywhere in the generated code. blocklisted_types: RegexSet { methods: {