diff --git a/CHANGELOG.md b/CHANGELOG.md index 1671f13af0..10be4ac40a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -210,6 +210,8 @@ ## Changed - Remove which and lazy-static dependencies (#2809, #2817). - Generate compile-time layout tests (#2787). +- `ParseCallbacks::int_macro` now takes an `i128` instead of an `i64`. +- `ParseCallbacks::func_macro` was renamed to `ParseCallbacks::fn_macro` and now takes a single `FnMacroInfo` argument. ## Removed ## Fixed - Fix `--formatter=prettyplease` not working in `bindgen-cli` by adding `prettyplease` feature and diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index 6b06c91bc3..7e9b37e47c 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -1,7 +1,7 @@ extern crate bindgen; use bindgen::callbacks::{ - DeriveInfo, IntKind, MacroParsingBehavior, ParseCallbacks, + DeriveInfo, FnMacroInfo, IntKind, MacroParsingBehavior, ParseCallbacks, }; use bindgen::{Builder, EnumVariation, Formatter}; use std::collections::HashSet; @@ -27,7 +27,7 @@ impl ParseCallbacks for MacroCallback { MacroParsingBehavior::Default } - fn int_macro(&self, name: &str, _value: i64) -> Option { + fn int_macro(&self, name: &str, _value: i128) -> Option { match name { "TESTMACRO_CUSTOMINTKIND_PATH" => Some(IntKind::Custom { name: "crate::MacroInteger", @@ -59,43 +59,45 @@ impl ParseCallbacks for MacroCallback { } } - fn func_macro(&self, name: &str, value: &[&[u8]]) { - match name { + fn fn_macro(&self, info: &FnMacroInfo<'_>) { + let args = info.args(); + let body = info.body(); + + match info.name() { "TESTMACRO_NONFUNCTIONAL" => { - panic!("func_macro was called for a non-functional macro"); + panic!("fn_macro was called for a non-functional macro"); } - "TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)" => { + "TESTMACRO_FUNCTIONAL_NONEMPTY" => { // Spaces are inserted into the right-hand side of a functional // macro during reconstruction from the tokenization. This might // change in the future, but it is safe by the definition of a // token in C, whereas leaving the spaces out could change // tokenization. - assert_eq!(value, &[b"-" as &[u8], b"TESTMACRO_INTEGER"]); + assert_eq!(args, &["TESTMACRO_INTEGER"]); + assert_eq!(body, &["-", "TESTMACRO_INTEGER"]); *self.seen_funcs.lock().unwrap() += 1; } - "TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)" => { - assert_eq!(value, &[] as &[&[u8]]); + "TESTMACRO_FUNCTIONAL_EMPTY" => { + assert_eq!(args, &["TESTMACRO_INTEGER"]); + assert_eq!(body, &[] as &[&str]); *self.seen_funcs.lock().unwrap() += 1; } - "TESTMACRO_FUNCTIONAL_TOKENIZED(a,b,c,d,e)" => { - assert_eq!( - value, - &[b"a" as &[u8], b"/", b"b", b"c", b"d", b"##", b"e"] - ); + "TESTMACRO_FUNCTIONAL_TOKENIZED" => { + assert_eq!(args, &["a", "b", "c", "d", "e"]); + assert_eq!(body, &["a", "/", "b", "c", "d", "##", "e"]); *self.seen_funcs.lock().unwrap() += 1; } - "TESTMACRO_FUNCTIONAL_SPLIT(a,b)" => { - assert_eq!(value, &[b"b", b",", b"a"]); + "TESTMACRO_FUNCTIONAL_SPLIT" => { + assert_eq!(args, &["a", "b"]); + assert_eq!(body, &["b", ",", "a"]); *self.seen_funcs.lock().unwrap() += 1; } - "TESTMACRO_STRING_FUNC_NON_UTF8(x)" => { - assert_eq!( - value, - &[b"(" as &[u8], b"x", b"\"\xff\xff\"", b")"] - ); + "TESTMACRO_STRING_FUNC_NON_UTF8" => { + assert_eq!(args, &["x"]); + assert_eq!(body, &["(", "x", r#""\xFF\xFF""#, ")"]); *self.seen_funcs.lock().unwrap() += 1; } - _ => { + name => { // The system might provide lots of functional macros. // Ensure we did not miss handling one that we meant to handle. assert!(!name.starts_with("TESTMACRO_"), "name = {}", name); @@ -145,7 +147,7 @@ impl Drop for MacroCallback { assert_eq!( *self.seen_funcs.lock().unwrap(), 5, - "func_macro handle was not called once for all relevant macros" + "fn_macro handle was not called once for all relevant macros" ); } } diff --git a/bindgen-integration/cpp/Test.h b/bindgen-integration/cpp/Test.h index 81a921b5f8..b67ea80c93 100644 --- a/bindgen-integration/cpp/Test.h +++ b/bindgen-integration/cpp/Test.h @@ -20,7 +20,7 @@ a //#define TESTMACRO_INVALID("string") // A conforming preprocessor rejects this #define TESTMACRO_STRING_EXPR ("string") -#define TESTMACRO_STRING_FUNC_NON_UTF8(x) (x "��") /* invalid UTF-8 on purpose */ +#define TESTMACRO_STRING_FUNC_NON_UTF8(x) (x "\xFF\xFF") /* invalid UTF-8 on purpose */ enum { MY_ANNOYING_MACRO = diff --git a/bindgen/callbacks.rs b/bindgen/callbacks.rs index 0f16c4c0bf..494ad816b2 100644 --- a/bindgen/callbacks.rs +++ b/bindgen/callbacks.rs @@ -27,82 +27,90 @@ pub trait ParseCallbacks: fmt::Debug { } /// This function will be run on every macro that is identified. - fn will_parse_macro(&self, _name: &str) -> MacroParsingBehavior { + #[allow(unused_variables)] + fn will_parse_macro(&self, name: &str) -> MacroParsingBehavior { MacroParsingBehavior::Default } /// This function will run for every extern variable and function. The returned value determines /// the name visible in the bindings. + #[allow(unused_variables)] fn generated_name_override( &self, - _item_info: ItemInfo<'_>, + item_info: ItemInfo<'_>, ) -> Option { None } /// This function will run for every extern variable and function. The returned value determines /// the link name in the bindings. + #[allow(unused_variables)] fn generated_link_name_override( &self, - _item_info: ItemInfo<'_>, + item_info: ItemInfo<'_>, ) -> Option { None } /// The integer kind an integer macro should have, given a name and the /// value of that macro, or `None` if you want the default to be chosen. - fn int_macro(&self, _name: &str, _value: i64) -> Option { + #[allow(unused_variables)] + fn int_macro(&self, name: &str, value: i128) -> Option { None } /// This will be run on every string macro. The callback cannot influence the further /// treatment of the macro, but may use the value to generate additional code or configuration. - fn str_macro(&self, _name: &str, _value: &[u8]) {} + #[allow(unused_variables)] + fn str_macro(&self, name: &str, value: &[u8]) {} /// This will be run on every function-like macro. The callback cannot /// influence the further treatment of the macro, but may use the value to /// generate additional code or configuration. - /// - /// The first parameter represents the name and argument list (including the - /// parentheses) of the function-like macro. The second parameter represents - /// the expansion of the macro as a sequence of tokens. - fn func_macro(&self, _name: &str, _value: &[&[u8]]) {} + #[allow(unused_variables)] + fn fn_macro(&self, info: &FnMacroInfo<'_>) {} /// This function should return whether, given an enum variant /// name, and value, this enum variant will forcibly be a constant. + #[allow(unused_variables)] fn enum_variant_behavior( &self, - _enum_name: Option<&str>, - _original_variant_name: &str, - _variant_value: EnumVariantValue, + enum_name: Option<&str>, + original_variant_name: &str, + variant_value: EnumVariantValue, ) -> Option { None } /// Allows to rename an enum variant, replacing `_original_variant_name`. + #[allow(unused_variables)] fn enum_variant_name( &self, - _enum_name: Option<&str>, - _original_variant_name: &str, - _variant_value: EnumVariantValue, + enum_name: Option<&str>, + original_variant_name: &str, + variant_value: EnumVariantValue, ) -> Option { None } /// Allows to rename an item, replacing `_original_item_name`. - fn item_name(&self, _original_item_name: &str) -> Option { + #[allow(unused_variables)] + fn item_name(&self, original_item_name: &str) -> Option { None } /// This will be called on every header filename passed to (`Builder::header`)[`crate::Builder::header`]. - fn header_file(&self, _filename: &str) {} + #[allow(unused_variables)] + fn header_file(&self, filename: &str) {} /// This will be called on every file inclusion, with the full path of the included file. - fn include_file(&self, _filename: &str) {} + #[allow(unused_variables)] + fn include_file(&self, filename: &str) {} /// This will be called every time `bindgen` reads an environment variable whether it has any /// content or not. - fn read_env_var(&self, _key: &str) {} + #[allow(unused_variables)] + fn read_env_var(&self, key: &str) {} /// This will be called to determine whether a particular blocklisted type /// implements a trait or not. This will be used to implement traits on @@ -113,10 +121,11 @@ pub trait ParseCallbacks: fmt::Debug { /// * `Some(ImplementsTrait::Manually)`: any type including `_name` can't /// derive `_derive_trait` but can implemented it manually /// * `Some(ImplementsTrait::No)`: `_name` doesn't implement `_derive_trait` + #[allow(unused_variables)] fn blocklisted_type_implements_trait( &self, - _name: &str, - _derive_trait: DeriveTrait, + name: &str, + derive_trait: DeriveTrait, ) -> Option { None } @@ -125,12 +134,14 @@ pub trait ParseCallbacks: fmt::Debug { /// /// If no additional attributes are wanted, this function should return an /// empty `Vec`. - fn add_derives(&self, _info: &DeriveInfo<'_>) -> Vec { + #[allow(unused_variables)] + fn add_derives(&self, info: &DeriveInfo<'_>) -> Vec { vec![] } /// Process a source code comment. - fn process_comment(&self, _comment: &str) -> Option { + #[allow(unused_variables)] + fn process_comment(&self, comment: &str) -> Option { None } @@ -138,9 +149,10 @@ pub trait ParseCallbacks: fmt::Debug { /// /// Caution: This allows overriding standard C++ visibility inferred by /// `respect_cxx_access_specs`. + #[allow(unused_variables)] fn field_visibility( &self, - _info: FieldInfo<'_>, + info: FieldInfo<'_>, ) -> Option { None } @@ -151,7 +163,8 @@ pub trait ParseCallbacks: fmt::Debug { /// /// The returned string is new function name. #[cfg(feature = "experimental")] - fn wrap_as_variadic_fn(&self, _name: &str) -> Option { + #[allow(unused_variables)] + fn wrap_as_variadic_fn(&self, name: &str) -> Option { None } } @@ -206,3 +219,27 @@ pub struct FieldInfo<'a> { /// The name of the field. pub field_name: &'a str, } + +/// A struct providing information about the function-like macro being passed to [`ParseCallbacks::fn_macro`]. +pub struct FnMacroInfo<'m> { + pub(crate) name: &'m str, + pub(crate) args: &'m [&'m str], + pub(crate) body: &'m [&'m str], +} + +impl FnMacroInfo<'_> { + /// The macro name. + pub fn name(&self) -> &str { + self.name + } + + /// The macro argument names. + pub fn args(&self) -> &[&str] { + self.args + } + + /// The macro body as delimited `clang` tokens. + pub fn body(&self) -> &[&str] { + self.body + } +} diff --git a/bindgen/clang.rs b/bindgen/clang.rs index 26c02acec9..9267b45826 100644 --- a/bindgen/clang.rs +++ b/bindgen/clang.rs @@ -792,7 +792,7 @@ impl Cursor { (kind == CXCursor_UnexposedAttr && cur.tokens().iter().any(|t| { t.kind == attr.token_kind && - t.spelling() == attr.name + t.spelling().to_bytes() == attr.name })) { *found_attr = true; @@ -1040,12 +1040,9 @@ pub(crate) struct ClangToken { } impl ClangToken { - /// Get the token spelling, without being converted to utf-8. - pub(crate) fn spelling(&self) -> &[u8] { - let c_str = unsafe { - CStr::from_ptr(clang_getCString(self.spelling) as *const _) - }; - c_str.to_bytes() + /// Returns the token spelling. + pub(crate) fn spelling(&self) -> &CStr { + unsafe { CStr::from_ptr(clang_getCString(self.spelling) as *const _) } } /// Converts a ClangToken to a `cexpr` token if possible. @@ -1068,7 +1065,7 @@ impl ClangToken { Some(token::Token { kind, - raw: self.spelling().to_vec().into_boxed_slice(), + raw: self.spelling().to_bytes().to_vec().into_boxed_slice(), }) } } diff --git a/bindgen/ir/context.rs b/bindgen/ir/context.rs index a1536935b6..85380c3c87 100644 --- a/bindgen/ir/context.rs +++ b/bindgen/ir/context.rs @@ -2273,8 +2273,20 @@ If you encounter an error missing from this list, please file an issue or a PR!" let mut kind = ModuleKind::Normal; let mut looking_for_name = false; for token in cursor.tokens().iter() { - match token.spelling() { - b"inline" => { + let spelling = token.spelling(); + let name = match spelling.to_str() { + Ok(name) => Cow::Borrowed(name), + Err(_) => { + let name = spelling.to_string_lossy(); + warn!( + "Lossy conversion of non-UTF8 token {:?} to {:?}.", + spelling, name + ); + name + } + }; + match name.as_ref() { + "inline" => { debug_assert!( kind != ModuleKind::Inline, "Multiple inline keywords?" @@ -2291,39 +2303,34 @@ If you encounter an error missing from this list, please file an issue or a PR!" // but the tokenization of the second begins with the double // colon. That's ok, so we only need to handle the weird // tokenization here. - b"namespace" | b"::" => { + "namespace" | "::" => { looking_for_name = true; } - b"{" => { + "{" => { // This should be an anonymous namespace. assert!(looking_for_name); break; } - name => { - if looking_for_name { - if module_name.is_none() { - module_name = Some( - String::from_utf8_lossy(name).into_owned(), - ); - } - break; - } else { - // This is _likely_, but not certainly, a macro that's - // been placed just before the namespace keyword. - // Unfortunately, clang tokens don't let us easily see - // through the ifdef tokens, so we don't know what this - // token should really be. Instead of panicking though, - // we warn the user that we assumed the token was blank, - // and then move on. - // - // See also https://github.com/rust-lang/rust-bindgen/issues/1676. - warn!( - "Ignored unknown namespace prefix '{}' at {:?} in {:?}", - String::from_utf8_lossy(name), - token, - cursor - ); + name if looking_for_name => { + if module_name.is_none() { + module_name = Some(name.to_owned()); } + break; + } + name => { + // This is _likely_, but not certainly, a macro that's + // been placed just before the namespace keyword. + // Unfortunately, clang tokens don't let us easily see + // through the ifdef tokens, so we don't know what this + // token should really be. Instead of panicking though, + // we warn the user that we assumed the token was blank, + // and then move on. + // + // See also https://github.com/rust-lang/rust-bindgen/issues/1676. + warn!( + "Ignored unknown namespace prefix '{}' at {:?} in {:?}", + name, token, cursor + ); } } } diff --git a/bindgen/ir/var.rs b/bindgen/ir/var.rs index 9d46135f74..1b20c2ab60 100644 --- a/bindgen/ir/var.rs +++ b/bindgen/ir/var.rs @@ -7,7 +7,7 @@ use super::function::cursor_mangling; use super::int::IntKind; use super::item::Item; use super::ty::{FloatKind, TypeKind}; -use crate::callbacks::{ItemInfo, ItemKind, MacroParsingBehavior}; +use crate::callbacks::{FnMacroInfo, ItemInfo, ItemKind, MacroParsingBehavior}; use crate::clang; use crate::clang::ClangToken; use crate::parse::{ClangSubItemParser, ParseError, ParseResult}; @@ -160,18 +160,43 @@ fn handle_function_macro( ) { let is_closing_paren = |t: &ClangToken| { // Test cheap token kind before comparing exact spellings. - t.kind == clang_sys::CXToken_Punctuation && t.spelling() == b")" + t.kind == clang_sys::CXToken_Punctuation && + t.spelling().to_bytes() == b")" }; - let tokens: Vec<_> = cursor.tokens().iter().collect(); - if let Some(boundary) = tokens.iter().position(is_closing_paren) { - let mut spelled = tokens.iter().map(ClangToken::spelling); - // Add 1, to convert index to length. - let left = spelled.by_ref().take(boundary + 1); - let left = left.collect::>().concat(); - if let Ok(left) = String::from_utf8(left) { - let right: Vec<_> = spelled.collect(); - callbacks.func_macro(&left, &right); - } + let mut raw_tokens: Vec<_> = cursor.tokens().iter().collect(); + if let Some(boundary) = raw_tokens.iter().position(is_closing_paren) { + let tokens: Result, _> = raw_tokens + .iter() + .map(|token| token.spelling().to_str()) + .collect(); + + let mut tokens = if let Ok(tokens) = tokens { + tokens + } else { + let raw_name = raw_tokens.remove(0); + warn!( + "Ignoring macro {:?} containing invalid UTF-8 tokens.", + raw_name.spelling() + ); + return; + }; + + let name = tokens.remove(0); + let args: Vec<_> = tokens + .drain(..boundary) + .skip(1) + .take(boundary - 2) + .filter(|&token| token != ",") + .collect(); + let body = tokens; + + let info = FnMacroInfo { + name, + args: &args, + body: &body, + }; + + callbacks.fn_macro(&info); } } @@ -260,7 +285,7 @@ impl ClangSubItemParser for Var { EvalResult::Int(Wrapping(value)) => { let kind = ctx .options() - .last_callback(|c| c.int_macro(&name, value)) + .last_callback(|c| c.int_macro(&name, value.into())) .unwrap_or_else(|| { default_macro_constant_type(ctx, value) });