From 72a39cf8590e017838f446b19a4da8bb81d74dce Mon Sep 17 00:00:00 2001 From: Adrian Taylor Date: Sat, 22 Feb 2025 10:24:49 +0000 Subject: [PATCH] Wrap template and opaque types in a marker. As standard, bindgen treats C++ pointers and references the same in its output. Downstream postprocessors might want to treat these things differently; for example to use a CppRef type for C++ references to encode the fact that they're not (usually) null. This PR emits references wrapped in a newtype wrapper called bindgen_marker_Reference This behavior is enabled by an option flag; it isn't default. This type isn't actually defined in bindgen; users are expected to define or replace this during postprocessing (e.g. by using syn to reinterpret bindgen's output, or perhaps there are ways to make this useful using --raw-line or other means to inject a transparent newtype wrapper). The same approach is taken to types which bindgen chooses to make opaque. This is done in various circumstances but the most common case is for non-type template parameters. Alternative designs considered: * Apply an attribute to the function taking or returning such a param, e.g. #[bindgen_marker_takes_reference_arg(1)] fn takes_reference(a: u32, b: *const Foo) This avoids the marker type, but the problem here is a more invasive change to bindgen because type information can no longer be contained in a syn::Type; type metadata needs to be communicated around inside bindgen. * Make a ParseCallbacks call each time a type is opaque or a reference. This would work for standalone opaque types, e.g. pub struct Bar { pub _bindgen_opaque_blob: u8 } but the main case where we need these is where bindgen is using an opaque or reference type in the signature of some function, and there's no real opportunity to describe this in any kind of callback, since the callback would have to describe where exactly in the function signature the opaque or reference type has been used (and then we run into the same problems of passing this metadata around in the innards of bindgen). In order to maintain the current simple design where any C++ type is represented as a simple syn::Type, the best approach seems to be to do this wrapping in a fake marker type. Another design decision here was to represent an RValue reference as: TypeKind::Reference(_, ReferenceKind::RValue) rather than a new variant: TypeKind::RValueReference(_) In the majority of cases references are treated the same way whether they're rvalue or lvalue, so this was less invasive, but either is fine. Part of https://github.com/google/autocxx/issues/124 --- .../tests/opaque_newtype_wrapper.rs | 17 ++++++++ .../tests/reference_newtype_wrapper.rs | 23 ++++++++++ .../tests/headers/opaque_newtype_wrapper.hpp | 14 ++++++ .../headers/reference_newtype_wrapper.hpp | 10 +++++ bindgen/codegen/helpers.rs | 34 +++++++++++++++ bindgen/codegen/mod.rs | 28 ++++++++++-- bindgen/ir/analysis/has_vtable.rs | 2 +- bindgen/ir/ty.rs | 33 +++++++++++--- bindgen/options/cli.rs | 10 +++++ bindgen/options/mod.rs | 43 +++++++++++++++++++ 10 files changed, 204 insertions(+), 10 deletions(-) create mode 100644 bindgen-tests/tests/expectations/tests/opaque_newtype_wrapper.rs create mode 100644 bindgen-tests/tests/expectations/tests/reference_newtype_wrapper.rs create mode 100644 bindgen-tests/tests/headers/opaque_newtype_wrapper.hpp create mode 100644 bindgen-tests/tests/headers/reference_newtype_wrapper.hpp diff --git a/bindgen-tests/tests/expectations/tests/opaque_newtype_wrapper.rs b/bindgen-tests/tests/expectations/tests/opaque_newtype_wrapper.rs new file mode 100644 index 0000000000..5193f0588c --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/opaque_newtype_wrapper.rs @@ -0,0 +1,17 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +#[repr(transparent)] +pub struct __bindgen_marker_Opaque(T); +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct Bar { + pub _address: u8, +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of Bar"][::std::mem::size_of::() - 1usize]; + ["Alignment of Bar"][::std::mem::align_of::() - 1usize]; +}; +unsafe extern "C" { + #[link_name = "\u{1}_Z8take_foo3FooILi3EE"] + pub fn take_foo(foo: __bindgen_marker_Opaque); +} diff --git a/bindgen-tests/tests/expectations/tests/reference_newtype_wrapper.rs b/bindgen-tests/tests/expectations/tests/reference_newtype_wrapper.rs new file mode 100644 index 0000000000..22ca64ba76 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/reference_newtype_wrapper.rs @@ -0,0 +1,23 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +#[repr(transparent)] +pub struct __bindgen_marker_Reference(*const T); +#[repr(transparent)] +pub struct __bindgen_marker_RValueReference(*const T); +unsafe extern "C" { + #[link_name = "\u{1}_Z17receive_referenceRKi"] + pub fn receive_reference( + input: __bindgen_marker_Reference<*const ::std::os::raw::c_int>, + ) -> __bindgen_marker_Reference<*const ::std::os::raw::c_int>; +} +unsafe extern "C" { + #[link_name = "\u{1}_Z21receive_mut_referenceRi"] + pub fn receive_mut_reference( + input: __bindgen_marker_Reference<*mut ::std::os::raw::c_int>, + ) -> __bindgen_marker_Reference<*mut ::std::os::raw::c_int>; +} +unsafe extern "C" { + #[link_name = "\u{1}_Z24receive_rvalue_referenceOi"] + pub fn receive_rvalue_reference( + input: __bindgen_marker_RValueReference<*mut ::std::os::raw::c_int>, + ); +} diff --git a/bindgen-tests/tests/headers/opaque_newtype_wrapper.hpp b/bindgen-tests/tests/headers/opaque_newtype_wrapper.hpp new file mode 100644 index 0000000000..f16af41eed --- /dev/null +++ b/bindgen-tests/tests/headers/opaque_newtype_wrapper.hpp @@ -0,0 +1,14 @@ +// bindgen-flags: --use-opaque-newtype-wrapper --raw-line '#[repr(transparent)] pub struct __bindgen_marker_Opaque(T);' -- -x c++ -std=c++14 + +class Bar {}; + +template +class Foo { +public: + int a[N]; +}; + +// Non-type template parameters are one of the cases where bindgen is +// forced to generate an opaque type. Ensure we spot that and annotate +// it. +void take_foo(Foo<3> foo); diff --git a/bindgen-tests/tests/headers/reference_newtype_wrapper.hpp b/bindgen-tests/tests/headers/reference_newtype_wrapper.hpp new file mode 100644 index 0000000000..7cb19bcb7d --- /dev/null +++ b/bindgen-tests/tests/headers/reference_newtype_wrapper.hpp @@ -0,0 +1,10 @@ +// bindgen-flags: --use-reference-newtype-wrapper --raw-line '#[repr(transparent)] pub struct __bindgen_marker_Reference(*const T); #[repr(transparent)] pub struct __bindgen_marker_RValueReference(*const T);' -- -x c++ -std=c++14 + +const int& receive_reference(const int& input) { + return input; +} +int& receive_mut_reference(int& input) { + return input; +} +void receive_rvalue_reference(int&& input) { +} diff --git a/bindgen/codegen/helpers.rs b/bindgen/codegen/helpers.rs index 82172f3488..d65854b42c 100644 --- a/bindgen/codegen/helpers.rs +++ b/bindgen/codegen/helpers.rs @@ -4,6 +4,7 @@ use proc_macro2::{Ident, Span}; use crate::ir::context::BindgenContext; use crate::ir::layout::Layout; +use crate::ir::ty::ReferenceKind; pub(crate) mod attributes { use proc_macro2::{Ident, Span, TokenStream}; @@ -84,6 +85,19 @@ pub(crate) fn blob( ctx: &BindgenContext, layout: Layout, ffi_safe: bool, +) -> syn::Type { + let inner_blob = blob_inner(ctx, layout, ffi_safe); + if ctx.options().use_opaque_newtype_wrapper { + syn::parse_quote! { __bindgen_marker_Opaque < #inner_blob > } + } else { + inner_blob + } +} + +pub(crate) fn blob_inner( + ctx: &BindgenContext, + layout: Layout, + ffi_safe: bool, ) -> syn::Type { let opaque = layout.opaque(); @@ -393,3 +407,23 @@ pub(crate) mod ast_ty { .collect() } } + +pub(crate) fn reference( + ty_ptr: syn::TypePtr, + kind: ReferenceKind, + ctx: &BindgenContext, +) -> syn::Type { + let ptr = syn::Type::Ptr(ty_ptr); + if ctx.options().use_reference_newtype_wrapper { + match kind { + ReferenceKind::LValue => { + syn::parse_quote! { __bindgen_marker_Reference < #ptr >} + } + ReferenceKind::RValue => { + syn::parse_quote! { __bindgen_marker_RValueReference < #ptr >} + } + } + } else { + ptr + } +} diff --git a/bindgen/codegen/mod.rs b/bindgen/codegen/mod.rs index f5518e432d..fdb8f5bc99 100644 --- a/bindgen/codegen/mod.rs +++ b/bindgen/codegen/mod.rs @@ -2292,7 +2292,13 @@ impl CodeGenerator for CompInfo { if has_address { let layout = Layout::new(1, 1); - let ty = helpers::blob(ctx, Layout::new(1, 1), false); + // Normally for opaque data we use helpers::blob, + // which may wrap it in __bindgen_marker_Opaque. + // Downstream tools may then use this as a sign that + // the type is complex or can't be stack-allocated, + // etc. But in this case this one-byte field is harmless + // so we'll just represent it without that extra marker. + let ty = helpers::blob_inner(ctx, Layout::new(1, 1), false); struct_layout.saw_field_with_layout( "_address", layout, @@ -4358,7 +4364,7 @@ impl TryToRustTy for Type { utils::build_path(item, ctx) } TypeKind::Opaque => self.try_to_opaque(ctx, item), - TypeKind::Pointer(inner) | TypeKind::Reference(inner) => { + TypeKind::Pointer(inner) | TypeKind::Reference(inner, _) => { // Check that this type has the same size as the target's pointer type. let size = self.get_layout(ctx, item).size; if size != ctx.target_pointer_size() { @@ -4391,7 +4397,23 @@ impl TryToRustTy for Type { { Ok(ty) } else { - Ok(ty.to_ptr(is_const)) + let ty_ptr = ty.to_ptr(is_const); + Ok(if let syn::Type::Ptr(ty_ptr_inside) = &ty_ptr { + // It always should be Type::Ptr, but just in case + if let TypeKind::Reference(_, reference_kind) = + self.kind() + { + helpers::reference( + ty_ptr_inside.clone(), + *reference_kind, + ctx, + ) + } else { + ty_ptr + } + } else { + ty_ptr + }) } } TypeKind::TypeParam => { diff --git a/bindgen/ir/analysis/has_vtable.rs b/bindgen/ir/analysis/has_vtable.rs index 3ff64a6d2b..e375be3015 100644 --- a/bindgen/ir/analysis/has_vtable.rs +++ b/bindgen/ir/analysis/has_vtable.rs @@ -159,7 +159,7 @@ impl<'ctx> MonotoneFramework for HasVtableAnalysis<'ctx> { TypeKind::TemplateAlias(t, _) | TypeKind::Alias(t) | TypeKind::ResolvedTypeRef(t) | - TypeKind::Reference(t) => { + TypeKind::Reference(t, _) => { trace!( " aliases and references forward to their inner type" ); diff --git a/bindgen/ir/ty.rs b/bindgen/ir/ty.rs index 2589a56fca..4323695609 100644 --- a/bindgen/ir/ty.rs +++ b/bindgen/ir/ty.rs @@ -259,7 +259,9 @@ impl Type { ) -> Option> { let name_info = match *self.kind() { TypeKind::Pointer(inner) => Some((inner, Cow::Borrowed("ptr"))), - TypeKind::Reference(inner) => Some((inner, Cow::Borrowed("ref"))), + TypeKind::Reference(inner, _) => { + Some((inner, Cow::Borrowed("ref"))) + } TypeKind::Array(inner, length) => { Some((inner, format!("array{length}").into())) } @@ -544,7 +546,7 @@ impl TemplateParameters for TypeKind { TypeKind::Enum(_) | TypeKind::Pointer(_) | TypeKind::BlockPointer(_) | - TypeKind::Reference(_) | + TypeKind::Reference(..) | TypeKind::UnresolvedTypeRef(..) | TypeKind::TypeParam | TypeKind::Alias(_) | @@ -570,6 +572,15 @@ pub(crate) enum FloatKind { Float128, } +/// Different kinds of C++ reference +#[derive(Debug, Clone, Copy)] +pub(crate) enum ReferenceKind { + /// C++ lvalue reference + LValue, + /// C++ rvalue reference + RValue, +} + /// The different kinds of types that we can parse. #[derive(Debug)] pub(crate) enum TypeKind { @@ -624,7 +635,8 @@ pub(crate) enum TypeKind { BlockPointer(TypeId), /// A reference to a type, as in: int& `foo()`. - Reference(TypeId), + /// The bool represents whether it's rvalue. + Reference(TypeId, ReferenceKind), /// An instantiation of an abstract template definition with a set of /// concrete template arguments. @@ -1021,14 +1033,23 @@ impl Type { } // XXX: RValueReference is most likely wrong, but I don't think we // can even add bindings for that, so huh. - CXType_RValueReference | CXType_LValueReference => { + CXType_LValueReference => { + let inner = Item::from_ty_or_ref( + ty.pointee_type().unwrap(), + location, + None, + ctx, + ); + TypeKind::Reference(inner, ReferenceKind::LValue) + } + CXType_RValueReference => { let inner = Item::from_ty_or_ref( ty.pointee_type().unwrap(), location, None, ctx, ); - TypeKind::Reference(inner) + TypeKind::Reference(inner, ReferenceKind::RValue) } // XXX DependentSizedArray is wrong CXType_VariableArray | CXType_DependentSizedArray => { @@ -1205,7 +1226,7 @@ impl Trace for Type { } match *self.kind() { TypeKind::Pointer(inner) | - TypeKind::Reference(inner) | + TypeKind::Reference(inner, _) | TypeKind::Array(inner, _) | TypeKind::Vector(inner, _) | TypeKind::BlockPointer(inner) | diff --git a/bindgen/options/cli.rs b/bindgen/options/cli.rs index f9a8572976..cc9bc103a6 100644 --- a/bindgen/options/cli.rs +++ b/bindgen/options/cli.rs @@ -444,6 +444,12 @@ struct BindgenCommand { /// Always be specific about the 'receiver' of a virtual function. #[arg(long)] use_specific_virtual_function_receiver: bool, + /// Use a newtype wrapper around opaque types. + #[arg(long)] + use_opaque_newtype_wrapper: bool, + /// Use a newtype wrapper around C++ references. + #[arg(long)] + use_reference_newtype_wrapper: bool, /// Use distinct char16_t #[arg(long)] use_distinct_char16_t: bool, @@ -636,6 +642,8 @@ where c_naming, explicit_padding, use_specific_virtual_function_receiver, + use_opaque_newtype_wrapper, + use_reference_newtype_wrapper, use_distinct_char16_t, vtable_generation, sort_semantically, @@ -935,6 +943,8 @@ where c_naming, explicit_padding, use_specific_virtual_function_receiver, + use_opaque_newtype_wrapper, + use_reference_newtype_wrapper, use_distinct_char16_t, vtable_generation, sort_semantically, diff --git a/bindgen/options/mod.rs b/bindgen/options/mod.rs index ab6b232ec3..f524c73c29 100644 --- a/bindgen/options/mod.rs +++ b/bindgen/options/mod.rs @@ -168,6 +168,49 @@ options! { as_args: "--use-specific-virtual-function-receiver", }, + /// Use a newtype wrapper to clearly denote "opaque" types; that is, + /// types where bindgen has generated a type matching the size and + /// alignment of the C++ type but without any knowledge of what's + /// inside it. The newtype wrapper will be a fake type called + /// `bindgen_marker_Opaque`. It's assumed that you will replace this with some + /// real sensible newtype wrapper of your own, either by post-processing + /// the output of bindgen, or by using a `use` statemet injected using + /// `--module-raw-lines` or similar. + use_opaque_newtype_wrapper: bool { + methods: { + /// If this is true, wrap opaque types in a fake newtype + /// wrapper which post-processors can replace with something + /// more sensible. + pub fn use_opaque_newtype_wrapper(mut self, doit: bool) -> Builder { + self.options.use_opaque_newtype_wrapper = doit; + self + } + }, + as_args: "--use-opaque-newtype-wrapper", + }, + + /// Use a newtype wrapper to clearly denote C++ reference types. + /// These are always generated as raw Rust pointers, and so otherwise + /// there's no way to distinguish references from pointers. + /// The newtype wrapper will be a fake type either called + /// `bindgen_marker_Reference` or `bindgen_marker_RVAlueReference`. + /// It's assumed that you will replace this with some + /// real sensible newtype wrapper of your own, either by post-processing + /// the output of bindgen, or by using a `use` statemet injected using + /// `--module-raw-lines` or similar. + use_reference_newtype_wrapper: bool { + methods: { + /// If this is true, wrap C++ references in a fake newtype + /// wrapper which post-processors can replace with something + /// more sensible. + pub fn use_reference_newtype_wrapper(mut self, doit: bool) -> Builder { + self.options.use_reference_newtype_wrapper = doit; + self + } + }, + as_args: "--use-reference-newtype-wrapper", + }, + /// Whether we should distinguish between C++'s 'char16_t' and 'u16'. /// The C++ type `char16_t` is its own special type; it's not a typedef /// of some other integer (this differs from C).