From a10bcfd0dd1e23c0139355deb567d83bd4b8a013 Mon Sep 17 00:00:00 2001 From: Adrian Taylor Date: Sun, 16 Feb 2025 16:25:28 +0000 Subject: [PATCH 1/4] Distinguish char16_t. With a new command-line option, this ensures that char16_t is distinct from uint16_t in generated bindings. On some platforms these are distinct types, so it can be important for downstream post processors to spot the difference. See the documentation on the new command-line option for expected behavior and usage here. Part of https://github.com/google/autocxx/issues/124. --- .../tests/expectations/tests/char16_t.rs | 7 ++++++ bindgen-tests/tests/headers/char16_t.hpp | 4 ++++ bindgen/codegen/helpers.rs | 6 +++++ bindgen/ir/context.rs | 3 +++ bindgen/ir/int.rs | 9 +++++--- bindgen/options/cli.rs | 5 +++++ bindgen/options/mod.rs | 22 +++++++++++++++++++ 7 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 bindgen-tests/tests/expectations/tests/char16_t.rs create mode 100644 bindgen-tests/tests/headers/char16_t.hpp diff --git a/bindgen-tests/tests/expectations/tests/char16_t.rs b/bindgen-tests/tests/expectations/tests/char16_t.rs new file mode 100644 index 0000000000..82d30fe517 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/char16_t.rs @@ -0,0 +1,7 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +#[repr(transparent)] +pub struct bindgen_cchar16_t(u16); +unsafe extern "C" { + #[link_name = "\u{1}_Z16receive_char16_tDs"] + pub fn receive_char16_t(input: bindgen_cchar16_t); +} diff --git a/bindgen-tests/tests/headers/char16_t.hpp b/bindgen-tests/tests/headers/char16_t.hpp new file mode 100644 index 0000000000..35e1f16dd3 --- /dev/null +++ b/bindgen-tests/tests/headers/char16_t.hpp @@ -0,0 +1,4 @@ +// bindgen-flags: --use-distinct-char16-t --raw-line '#[repr(transparent)] pub struct bindgen_cchar16_t(u16);' -- -x c++ -std=c++14 + +void receive_char16_t(char16_t input) { +} diff --git a/bindgen/codegen/helpers.rs b/bindgen/codegen/helpers.rs index 7b09ed7cfb..70f0125931 100644 --- a/bindgen/codegen/helpers.rs +++ b/bindgen/codegen/helpers.rs @@ -187,6 +187,12 @@ pub(crate) mod ast_ty { match ik { IntKind::Bool => syn::parse_quote! { bool }, IntKind::Char { .. } => raw_type(ctx, "c_char"), + // The following is used only when an unusual command-line + // argument is used. bindgen_cchar16_t is not a real type; + // but this allows downstream postprocessors to distinguish + // this case and do something special for C++ bindings + // containing char16_t. + IntKind::Char16 => syn::parse_quote! { bindgen_cchar16_t }, IntKind::SChar => raw_type(ctx, "c_schar"), IntKind::UChar => raw_type(ctx, "c_uchar"), IntKind::Short => raw_type(ctx, "c_short"), diff --git a/bindgen/ir/context.rs b/bindgen/ir/context.rs index 78790d61c4..99c75d63d8 100644 --- a/bindgen/ir/context.rs +++ b/bindgen/ir/context.rs @@ -1980,6 +1980,9 @@ If you encounter an error missing from this list, please file an issue or a PR!" CXType_Short => TypeKind::Int(IntKind::Short), CXType_UShort => TypeKind::Int(IntKind::UShort), CXType_WChar => TypeKind::Int(IntKind::WChar), + CXType_Char16 if self.options().use_distinct_char16_t => { + TypeKind::Int(IntKind::Char16) + } CXType_Char16 => TypeKind::Int(IntKind::U16), CXType_Char32 => TypeKind::Int(IntKind::U32), CXType_Long => TypeKind::Int(IntKind::Long), diff --git a/bindgen/ir/int.rs b/bindgen/ir/int.rs index 4b49931ed8..4caa6b2d06 100644 --- a/bindgen/ir/int.rs +++ b/bindgen/ir/int.rs @@ -54,9 +54,12 @@ pub enum IntKind { /// A 16-bit signed integer. I16, - /// Either a `char16_t` or a `wchar_t`. + /// A 16-bit integer, used only for enum size representation. U16, + /// Either a `char16_t` or a `wchar_t`. + Char16, + /// A 32-bit signed integer. I32, @@ -94,7 +97,7 @@ impl IntKind { // to know whether it is or not right now (unlike char, there's no // WChar_S / WChar_U). Bool | UChar | UShort | UInt | ULong | ULongLong | U8 | U16 | - WChar | U32 | U64 | U128 => false, + Char16 | WChar | U32 | U64 | U128 => false, SChar | Short | Int | Long | LongLong | I8 | I16 | I32 | I64 | I128 => true, @@ -110,7 +113,7 @@ impl IntKind { use self::IntKind::*; Some(match *self { Bool | UChar | SChar | U8 | I8 | Char { .. } => 1, - U16 | I16 => 2, + U16 | I16 | Char16 => 2, U32 | I32 => 4, U64 | I64 => 8, I128 | U128 => 16, diff --git a/bindgen/options/cli.rs b/bindgen/options/cli.rs index 8c4c05bc84..1efddb02f3 100644 --- a/bindgen/options/cli.rs +++ b/bindgen/options/cli.rs @@ -441,6 +441,9 @@ struct BindgenCommand { /// Always output explicit padding fields. #[arg(long)] explicit_padding: bool, + /// Use distinct char16_t + #[arg(long)] + use_distinct_char16_t: bool, /// Enables generation of vtable functions. #[arg(long)] vtable_generation: bool, @@ -629,6 +632,7 @@ where translate_enum_integer_types, c_naming, explicit_padding, + use_distinct_char16_t, vtable_generation, sort_semantically, merge_extern_blocks, @@ -926,6 +930,7 @@ where translate_enum_integer_types, c_naming, explicit_padding, + use_distinct_char16_t, vtable_generation, sort_semantically, merge_extern_blocks, diff --git a/bindgen/options/mod.rs b/bindgen/options/mod.rs index 9d1d195980..1a675401a4 100644 --- a/bindgen/options/mod.rs +++ b/bindgen/options/mod.rs @@ -153,6 +153,28 @@ macro_rules! options { } options! { + /// Whether we should distinguish between 'char16_t' and 'u16'. + /// As standard, bindgen represents `char16_t` as `u16`. + /// Rust does not have a `std::os::raw::c_char16_t` type, and thus + /// we can't use a built-in Rust type in the generated bindings. + /// But for some uses of bindgen, especially when downstream + /// post-processing occurs, it's important to distinguish `char16_t` + /// from normal `uint16_t`. When this option is enabled, bindgen + /// generates a fake type called `bindgen_cchar16_t`. Downstream + /// code post-processors should arrange to replace this with a + /// real type. + use_distinct_char16_t: bool { + methods: { + /// If this is true, denote 'char16_t' as a separate type from 'u16' + /// Disabled by default. + pub fn use_distinct_char16_t(mut self, doit: bool) -> Builder { + self.options.use_distinct_char16_t = doit; + self + } + }, + as_args: "--use-distinct-char16-t", + }, + /// Types that have been blocklisted and should not appear anywhere in the generated code. blocklisted_types: RegexSet { methods: { From 58805949e12abaa00c0ddc1f6587bbd371a35a9e Mon Sep 17 00:00:00 2001 From: Adrian Taylor Date: Sun, 16 Feb 2025 15:57:46 +0000 Subject: [PATCH 2/4] Report enums in ParseCallbacks. ParseCallbacks previously reported structs but not enums. Enhance it to do so. At the moment, little information is provided about enums - but bindgen doesn't handle (rare) anonymous enums so this seems the right amount of information to report. At the moment, effectively this just provides a mapping between name and DiscoveredItemId. One of a number of PRs I'll be raising for https://github.com/google/autocxx/issues/124. In future PRs I'll be hoping to add further callbacks which report more information based on DiscoveredItemId, so having the DiscoveredItemId for each enum is an important pre-requisite. --- .../header_item_discovery.h | 16 +++++++- .../item_discovery_callback/mod.rs | 40 +++++++++++++++++++ bindgen/callbacks.rs | 9 ++++- bindgen/codegen/mod.rs | 9 +++++ 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/bindgen-tests/tests/parse_callbacks/item_discovery_callback/header_item_discovery.h b/bindgen-tests/tests/parse_callbacks/item_discovery_callback/header_item_discovery.h index 10e97ea480..b2bb04f15f 100644 --- a/bindgen-tests/tests/parse_callbacks/item_discovery_callback/header_item_discovery.h +++ b/bindgen-tests/tests/parse_callbacks/item_discovery_callback/header_item_discovery.h @@ -1,4 +1,4 @@ -// Unions +// Structs void function_using_anonymous_struct(struct {} arg0); struct NamedStruct { @@ -13,4 +13,16 @@ void function_using_anonymous_union(union {} arg0); union NamedUnion { }; -typedef union NamedUnion AliasOfNamedUnion; \ No newline at end of file +typedef union NamedUnion AliasOfNamedUnion; + +// Enums + +// We don't include an anonymous enum because such enums +// are not visible outside the function, and thus tend not +// to be useful - bindgen doesn't handle them for this reason. + +enum NamedEnum { + Fish, +}; + +typedef enum NamedEnum AliasOfNamedEnum; \ No newline at end of file diff --git a/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs b/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs index 74af110d00..93a2b029d7 100644 --- a/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs +++ b/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs @@ -60,6 +60,19 @@ pub fn test_item_discovery_callback() { alias_for: DiscoveredItemId::new(20), }, ), + ( + DiscoveredItemId::new(27), + DiscoveredItem::Alias { + alias_name: "AliasOfNamedEnum".to_string(), + alias_for: DiscoveredItemId::new(24), + }, + ), + ( + DiscoveredItemId::new(24), + DiscoveredItem::Enum { + final_name: "NamedEnum".to_string(), + }, + ), ( DiscoveredItemId::new(30), DiscoveredItem::Struct { @@ -126,6 +139,9 @@ fn compare_item_info( expected, generated, ), + DiscoveredItem::Enum { .. } => { + compare_enum_info(expected_item, generated_item) + } } } @@ -203,6 +219,30 @@ pub fn compare_union_info( } } +pub fn compare_enum_info( + expected_item: &DiscoveredItem, + generated_item: &DiscoveredItem, +) -> bool { + let DiscoveredItem::Enum { + final_name: expected_final_name, + } = expected_item + else { + unreachable!() + }; + + let DiscoveredItem::Enum { + final_name: generated_final_name, + } = generated_item + else { + unreachable!() + }; + + if !compare_names(expected_final_name, generated_final_name) { + return false; + } + true +} + pub fn compare_alias_info( expected_item: &DiscoveredItem, generated_item: &DiscoveredItem, diff --git a/bindgen/callbacks.rs b/bindgen/callbacks.rs index 8a21e98dea..c2be66828a 100644 --- a/bindgen/callbacks.rs +++ b/bindgen/callbacks.rs @@ -217,7 +217,14 @@ pub enum DiscoveredItem { /// The identifier of the discovered type alias_for: DiscoveredItemId, - }, // functions, modules, etc. + }, + + /// Represents an enum. + Enum { + /// The final name of the generated binding + final_name: String, + }, + // functions, modules, etc. } /// Relevant information about a type to which new derive attributes will be added using diff --git a/bindgen/codegen/mod.rs b/bindgen/codegen/mod.rs index a899ac4de9..f5518e432d 100644 --- a/bindgen/codegen/mod.rs +++ b/bindgen/codegen/mod.rs @@ -3770,6 +3770,15 @@ impl CodeGenerator for Enum { let repr = repr.to_rust_ty_or_opaque(ctx, item); let has_typedef = ctx.is_enum_typedef_combo(item.id()); + ctx.options().for_each_callback(|cb| { + cb.new_item_found( + DiscoveredItemId::new(item.id().as_usize()), + DiscoveredItem::Enum { + final_name: name.to_string(), + }, + ); + }); + let mut builder = EnumBuilder::new(&name, attrs, &repr, variation, has_typedef); From 61603fcd6a738b1a0130eec036eba53993fec9b3 Mon Sep 17 00:00:00 2001 From: Adrian Taylor Date: Sun, 16 Feb 2025 16:11:12 +0000 Subject: [PATCH 3/4] Add extra tests. These files aspects of bindgen behavior which may not be generally useful to most consumers but are more important to downstream postprocessors such as autocxx. One of them tests enums embedded within classes, and the other tests various types of C++ constructor. Part of https://github.com/google/autocxx/issues/124. --- .../expectations/tests/class_with_enum.rs | 14 +++++ .../expectations/tests/special-members.rs | 51 +++++++++++++++++++ .../tests/headers/class_with_enum.hpp | 7 +++ .../tests/headers/special-members.hpp | 7 +++ 4 files changed, 79 insertions(+) create mode 100644 bindgen-tests/tests/expectations/tests/class_with_enum.rs create mode 100644 bindgen-tests/tests/expectations/tests/special-members.rs create mode 100644 bindgen-tests/tests/headers/class_with_enum.hpp create mode 100644 bindgen-tests/tests/headers/special-members.hpp diff --git a/bindgen-tests/tests/expectations/tests/class_with_enum.rs b/bindgen-tests/tests/expectations/tests/class_with_enum.rs new file mode 100644 index 0000000000..ca1806357c --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/class_with_enum.rs @@ -0,0 +1,14 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct A { + pub _address: u8, +} +pub const A_B_B1: A_B = 0; +pub const A_B_B2: A_B = 1; +pub type A_B = ::std::os::raw::c_uint; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of A"][::std::mem::size_of::() - 1usize]; + ["Alignment of A"][::std::mem::align_of::() - 1usize]; +}; diff --git a/bindgen-tests/tests/expectations/tests/special-members.rs b/bindgen-tests/tests/expectations/tests/special-members.rs new file mode 100644 index 0000000000..4f54670c86 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/special-members.rs @@ -0,0 +1,51 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +#[repr(C)] +#[derive(Debug, Default)] +pub struct A { + pub _address: u8, +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of A"][::std::mem::size_of::() - 1usize]; + ["Alignment of A"][::std::mem::align_of::() - 1usize]; +}; +unsafe extern "C" { + #[link_name = "\u{1}_ZN1AC1Ev"] + pub fn A_A(this: *mut A); +} +unsafe extern "C" { + #[link_name = "\u{1}_ZN1AC1ERS_"] + pub fn A_A1(this: *mut A, arg1: *mut A); +} +unsafe extern "C" { + #[link_name = "\u{1}_ZN1AC1EOS_"] + pub fn A_A2(this: *mut A, arg1: *mut A); +} +unsafe extern "C" { + #[link_name = "\u{1}_ZN1AD1Ev"] + pub fn A_A_destructor(this: *mut A); +} +impl A { + #[inline] + pub unsafe fn new() -> Self { + let mut __bindgen_tmp = ::std::mem::MaybeUninit::uninit(); + A_A(__bindgen_tmp.as_mut_ptr()); + __bindgen_tmp.assume_init() + } + #[inline] + pub unsafe fn new1(arg1: *mut A) -> Self { + let mut __bindgen_tmp = ::std::mem::MaybeUninit::uninit(); + A_A1(__bindgen_tmp.as_mut_ptr(), arg1); + __bindgen_tmp.assume_init() + } + #[inline] + pub unsafe fn new2(arg1: *mut A) -> Self { + let mut __bindgen_tmp = ::std::mem::MaybeUninit::uninit(); + A_A2(__bindgen_tmp.as_mut_ptr(), arg1); + __bindgen_tmp.assume_init() + } + #[inline] + pub unsafe fn destruct(&mut self) { + A_A_destructor(self) + } +} diff --git a/bindgen-tests/tests/headers/class_with_enum.hpp b/bindgen-tests/tests/headers/class_with_enum.hpp new file mode 100644 index 0000000000..ebbc2c4049 --- /dev/null +++ b/bindgen-tests/tests/headers/class_with_enum.hpp @@ -0,0 +1,7 @@ +class A { +public: + enum B { + B1, + B2, + }; +}; \ No newline at end of file diff --git a/bindgen-tests/tests/headers/special-members.hpp b/bindgen-tests/tests/headers/special-members.hpp new file mode 100644 index 0000000000..753b2fdc0a --- /dev/null +++ b/bindgen-tests/tests/headers/special-members.hpp @@ -0,0 +1,7 @@ +class A { +public: + A(); + A(A&); + A(A&&); + ~A(); +}; \ No newline at end of file From b3909176baeb7c019e9d80a1513737fa89fd47aa Mon Sep 17 00:00:00 2001 From: Adrian Taylor Date: Wed, 19 Feb 2025 14:10:36 +0000 Subject: [PATCH 4/4] Fix warning. --- bindgen/ir/function.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindgen/ir/function.rs b/bindgen/ir/function.rs index 1ce59d199b..ef798fdd69 100644 --- a/bindgen/ir/function.rs +++ b/bindgen/ir/function.rs @@ -935,7 +935,7 @@ impl ClangSubItemParser for Function { Some(SpecialMemberKind::CopyConstructor) } else if cursor.is_move_constructor() { Some(SpecialMemberKind::MoveConstructor) - } else if cursor.kind() == clang_sys::CXCursor_Destructor { + } else if cursor.kind() == CXCursor_Destructor { Some(SpecialMemberKind::Destructor) } else { None