Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ rust_binary(
"//third-party:proc-macro2",
"//third-party:quote",
"//third-party:syn",
"//third-party:unicode-ident",
],
)

Expand Down Expand Up @@ -76,6 +77,7 @@ rust_library(
"//third-party:quote",
"//third-party:rustversion",
"//third-party:syn",
"//third-party:unicode-ident",
],
)

Expand All @@ -101,6 +103,7 @@ rust_library(
"//third-party:quote",
"//third-party:scratch",
"//third-party:syn",
"//third-party:unicode-ident",
],
)

Expand All @@ -125,5 +128,6 @@ rust_library(
"//third-party:proc-macro2",
"//third-party:quote",
"//third-party:syn",
"//third-party:unicode-ident",
],
)
4 changes: 4 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ rust_binary(
"@crates.io//:proc-macro2",
"@crates.io//:quote",
"@crates.io//:syn",
"@crates.io//:unicode-ident",
],
)

Expand Down Expand Up @@ -70,6 +71,7 @@ rust_proc_macro(
"@crates.io//:proc-macro2",
"@crates.io//:quote",
"@crates.io//:syn",
"@crates.io//:unicode-ident",
],
)

Expand All @@ -87,6 +89,7 @@ rust_library(
"@crates.io//:quote",
"@crates.io//:scratch",
"@crates.io//:syn",
"@crates.io//:unicode-ident",
],
)

Expand All @@ -104,5 +107,6 @@ rust_library(
"@crates.io//:proc-macro2",
"@crates.io//:quote",
"@crates.io//:syn",
"@crates.io//:unicode-ident",
],
)
1 change: 1 addition & 0 deletions gen/build/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ proc-macro2 = { version = "1.0.74", default-features = false, features = ["span-
quote = { version = "1.0.35", default-features = false }
scratch = "1.0.5"
syn = { version = "2.0.46", default-features = false, features = ["clone-impls", "full", "parsing", "printing"] }
unicode-ident = "1.0.22"

[dev-dependencies]
cxx = { version = "1.0", path = "../.." }
Expand Down
1 change: 1 addition & 0 deletions gen/cmd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ indexmap = "2.9.0"
proc-macro2 = { version = "1.0.74", default-features = false, features = ["span-locations"] }
quote = { version = "1.0.35", default-features = false }
syn = { version = "2.0.46", default-features = false, features = ["clone-impls", "full", "parsing", "printing"] }
unicode-ident = "1.0.22"

[package.metadata.docs.rs]
targets = ["x86_64-unknown-linux-gnu"]
1 change: 1 addition & 0 deletions gen/lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ indexmap = "2.9.0"
proc-macro2 = { version = "1.0.74", default-features = false, features = ["span-locations"] }
quote = { version = "1.0.35", default-features = false }
syn = { version = "2.0.46", default-features = false, features = ["clone-impls", "full", "parsing", "printing"] }
unicode-ident = "1.0.22"

[package.metadata.docs.rs]
targets = ["x86_64-unknown-linux-gnu"]
Expand Down
1 change: 1 addition & 0 deletions macro/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ indexmap = "2.9.0"
proc-macro2 = "1.0.74"
quote = "1.0.35"
syn = { version = "2.0.46", features = ["full"] }
unicode-ident = "1.0.22"

[dev-dependencies]
cxx = { version = "1.0", path = ".." }
Expand Down
84 changes: 77 additions & 7 deletions syntax/check.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
use crate::syntax::atom::Atom::{self, *};
use crate::syntax::message::Message;
use crate::syntax::names::ForeignName;
use crate::syntax::report::Errors;
use crate::syntax::visit::{self, Visit};
use crate::syntax::{
error, ident, trivial, Api, Array, Enum, ExternFn, ExternType, FnKind, Impl, Lang, Lifetimes,
NamedType, Ptr, Receiver, Ref, Signature, SliceRef, Struct, Trait, Ty1, Type, TypeAlias, Types,
NamedType, Pair, Ptr, Receiver, Ref, Signature, SliceRef, Struct, Trait, Ty1, Type, TypeAlias,
Types,
};
use proc_macro2::{Delimiter, Group, Ident, TokenStream};
use proc_macro2::{Delimiter, Group, TokenStream};
use quote::{quote, ToTokens};
use std::collections::HashSet;
use std::fmt::Display;
use syn::{GenericParam, Generics, Lifetime};
use std::sync::LazyLock;
use syn::ext::IdentExt;
use syn::parse::Parser;
use syn::{GenericParam, Generics, Ident, Lifetime};

pub(crate) struct Check<'a> {
apis: &'a [Api],
Expand Down Expand Up @@ -331,7 +337,7 @@ fn check_type_fn(cx: &mut Check, ty: &Signature) {

fn check_api_struct(cx: &mut Check, strct: &Struct) {
let name = &strct.name;
check_reserved_name(cx, &name.rust);
check_type_name(cx, name);
check_lifetimes(cx, &strct.generics);

if strct.fields.is_empty() {
Expand Down Expand Up @@ -374,6 +380,7 @@ fn check_api_struct(cx: &mut Check, strct: &Struct) {
}

for field in &strct.fields {
check_name(cx, &field.name);
if let Type::Fn(_) = field.ty {
cx.error(
field,
Expand All @@ -388,7 +395,7 @@ fn check_api_struct(cx: &mut Check, strct: &Struct) {
}

fn check_api_enum(cx: &mut Check, enm: &Enum) {
check_reserved_name(cx, &enm.name.rust);
check_type_name(cx, &enm.name);
check_lifetimes(cx, &enm.generics);

if enm.variants.is_empty() && !enm.explicit_repr {
Expand All @@ -399,6 +406,10 @@ fn check_api_enum(cx: &mut Check, enm: &Enum) {
);
}

for variant in &enm.variants {
check_name(cx, &variant.name);
}

for derive in &enm.derives {
match derive.what {
Trait::BitAnd
Expand Down Expand Up @@ -434,7 +445,7 @@ fn check_api_enum(cx: &mut Check, enm: &Enum) {
}

fn check_api_type(cx: &mut Check, ety: &ExternType) {
check_reserved_name(cx, &ety.name.rust);
check_type_name(cx, &ety.name);
check_lifetimes(cx, &ety.generics);

for derive in &ety.derives {
Expand Down Expand Up @@ -468,6 +479,7 @@ fn check_api_type(cx: &mut Check, ety: &ExternType) {
}

fn check_api_fn(cx: &mut Check, efn: &ExternFn) {
check_fn_name(cx, &efn.name);
match efn.lang {
Lang::Cxx | Lang::CxxUnwind => {
if !efn.generics.params.is_empty() && !efn.trusted {
Expand Down Expand Up @@ -582,6 +594,7 @@ fn check_api_fn(cx: &mut Check, efn: &ExternFn) {
}

fn check_api_type_alias(cx: &mut Check, alias: &TypeAlias) {
check_type_name(cx, &alias.name);
check_lifetimes(cx, &alias.generics);

for derive in &alias.derives {
Expand Down Expand Up @@ -682,7 +695,62 @@ fn check_mut_return_restriction(cx: &mut Check, efn: &ExternFn) {
);
}

fn check_reserved_name(cx: &mut Check, ident: &Ident) {
/// Checks an API `name` (e.g. name of a type, function, method, etc.).
fn check_name(cx: &mut Check, name: &Pair) {
let cxx_name = name.cxx.as_str();
let mut report_error = move |msg: String| {
let mut tokens = name.rust.clone();
tokens.set_span(name.cxx.span());
cx.error(tokens, msg);
};

#[rustfmt::skip]
static CPP_RESERVED_KEYWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
// Taken from https://en.cppreference.com/w/cpp/keywords.html
[
"alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel", "atomic_commit",
"atomic_noexcept", "auto", "bitand", "bitor", "bool", "break", "case", "catch",
"char", "char8_t", "char16_t", "char32_t", "class", "compl", "concept", "const",
"consteval", "constexpr", "constinit", "const_cast", "continue", "contract_assert",
"co_await", "co_return", "co_yield", "", "decltype", "default", "delete", "do",
"double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false",
"float", "for", "friend", "goto", "if", "inline", "int", "long", "mutable",
"namespace", "new", "noexcept", "not", "not_eq", "nullptr", "operator", "or",
"or_eq", "private", "protected", "public", "", "reflexpr", "register",
"reinterpret_cast", "requires", "return", "short", "signed", "sizeof", "static",
"static_assert", "static_cast", "struct", "switch", "synchronized", "template",
"this", "thread_local", "throw", "true", "try", "typedef", "typeid", "typename",
"union", "unsigned", "using", "virtual", "void", "volatile", "wchar_t", "while",
"xor", "xor_eq",
].into_iter().collect()
});
if CPP_RESERVED_KEYWORDS.contains(cxx_name) {
let msg = format!("C++ reserved keyword can't be used as a C++ identifier: {cxx_name}");
report_error(msg);
}

// Most API names need to have a form of a valid C++ identifier. API names
// that allow other forms (e.g. `operator==` for function names) should
// check and allow those forms first (e.g. by `check_fn_name`), before
// deciding to call `check_name`.
if let Err(e) = Ident::parse_any.parse_str(cxx_name) {
let msg = format!("Invalid C++ identifier: {e}");
report_error(msg);
}
}

/// Checks `name` of a function or a method.
fn check_fn_name(cx: &mut Check, name: &Pair) {
if ForeignName::is_valid_operator_name(name.cxx.as_str()) {
return;
}

check_name(cx, name);
}

/// Checks `name` of a type (e.g. a struct, enum, or a type alias).
fn check_type_name(cx: &mut Check, name: &Pair) {
let ident = &name.rust;
if ident == "Box"
|| ident == "UniquePtr"
|| ident == "SharedPtr"
Expand All @@ -694,6 +762,8 @@ fn check_reserved_name(cx: &mut Check, ident: &Ident) {
{
cx.error(ident, "reserved name");
}

check_name(cx, name);
}

fn check_reserved_lifetime(cx: &mut Check, lifetime: &Lifetime) {
Expand Down
42 changes: 39 additions & 3 deletions syntax/names.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
use crate::syntax::symbol::Segment;
use crate::syntax::{Lifetimes, NamedType, Pair, Symbol};
use proc_macro2::{Ident, Span};
use std::collections::HashSet;
use std::fmt::{self, Display};
use std::iter;
use std::sync::LazyLock;
use syn::ext::IdentExt;
use syn::parse::{Error, Parser, Result};
use syn::punctuated::Punctuated;

#[derive(Clone)]
pub(crate) struct ForeignName {
text: String,
span: Span,
}

impl Pair {
Expand All @@ -36,16 +39,49 @@ impl NamedType {

impl ForeignName {
pub(crate) fn parse(text: &str, span: Span) -> Result<Self> {
// TODO: support C++ names containing whitespace (`unsigned int`) or
// non-alphanumeric characters (`operator++`).
if ForeignName::is_valid_operator_name(text) {
return Ok(ForeignName {
text: text.to_string(),
span,
});
}

match Ident::parse_any.parse_str(text) {
Ok(ident) => {
let text = ident.to_string();
Ok(ForeignName { text })
Ok(ForeignName { text, span })
}
Err(err) => Err(Error::new(span, err)),
}
}

pub(crate) fn as_str(&self) -> &str {
&self.text
}

pub(crate) fn span(&self) -> Span {
self.span
}

pub(crate) fn is_valid_operator_name(name: &str) -> bool {
#[rustfmt::skip]
static CPP_OPERATORS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
// Based on `llvm/llvm-project/clang/include/clang/Basic/OperatorKinds.def`.
// Excluding `?` because it is not overridable.
//
// TODO: Consider also allowing `operator <type>`
// (see https://en.cppreference.com/w/cpp/language/cast_operator.html).
[
" new", " delete", " new[]", " delete[]", " co_await",
"+", "-", "*", "/", "%", "^", "&", "|", "~", "!", "=", "<", ">",
"+=", "-=", "*=", "/=", "%=", "^=", "&=", "|=",
"<<", ">>", "<<=", ">>=", "==", "!=", "<=", ">=", "<=>",
"&&", "||", "++", "--", ",", "->*", "->", "()", "[]",
].into_iter().collect()
});
name.strip_prefix("operator")
.is_some_and(|suffix| CPP_OPERATORS.contains(suffix))
}
}

impl Display for ForeignName {
Expand Down
61 changes: 58 additions & 3 deletions syntax/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,34 @@ impl Segment for Pair {

impl Segment for ForeignName {
fn write(&self, symbol: &mut Symbol) {
// TODO: support C++ names containing whitespace (`unsigned int`) or
// non-alphanumeric characters (`operator++`).
self.to_string().write(symbol);
/// Escapes arbitrary C++ name (e.g. `operator==`) into a String
/// that is a valid C identifier. It is important that this is an
/// [injective function](https://en.wikipedia.org/wiki/Injective_function)
/// (i.e. distinct `name`s need to map to distinct results).
fn escape(name: &str) -> String {
let mut result = String::with_capacity(name.len());
for (index, ch) in name.chars().enumerate() {
if ch == '_' {
write!(&mut result, "_u").unwrap();
continue;
}

let should_escape = if index == 0 {
!unicode_ident::is_xid_start(ch)
} else {
!unicode_ident::is_xid_continue(ch)
};
if should_escape {
write!(&mut result, "_{:x}h", ch as u32).unwrap();
continue;
}

write!(&mut result, "{ch}").unwrap();
}
result
}

escape(self.as_str()).write(symbol);
}
}

Expand All @@ -114,3 +139,33 @@ pub(crate) fn join(segments: &[&dyn Segment]) -> Symbol {
assert!(!symbol.0.is_empty());
symbol
}

#[cfg(test)]
mod test {
use super::join;
use crate::syntax::ForeignName;
use proc_macro2::Span;

#[test]
fn test_impl_segment_for_foreign_name() {
fn t(foreign_name_str: &str, expected_symbol_str: &str) {
let foreign_name = ForeignName::parse(foreign_name_str, Span::call_site()).unwrap();
let symbol = join(&[&foreign_name]);
let actual_symbol_str = symbol.to_string();
assert_eq!(
actual_symbol_str, expected_symbol_str,
"Expecting `{foreign_name_str}` to mangle as `{expected_symbol_str}` \
but got `{actual_symbol_str}` instead.",
);
}

t("foo", "foo");

// Escaping of non-identifier characters like `=`.
t("operator==", "operator_3dh_3dh");

// Feeble attempt of testing injectivity
// (need to escape `_` to avoid a conflict with result of the previous test).
t("operator_3dh_3dh", "operator_u3dh_u3dh");
}
}
Loading