Skip to content

Commit 8bf0b75

Browse files
committed
Support #[cxx_name = "operator+="], etc. (i.e. not just C identifiers).
This came up in https://crrev.com/c/7253816/comment/3f283bcf_9ab0850a/ and in #109 (comment)
1 parent 89dfc24 commit 8bf0b75

File tree

20 files changed

+240
-8
lines changed

20 files changed

+240
-8
lines changed

BUCK

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ rust_binary(
4747
"//third-party:proc-macro2",
4848
"//third-party:quote",
4949
"//third-party:syn",
50+
"//third-party:unicode-ident",
5051
],
5152
)
5253

@@ -76,6 +77,7 @@ rust_library(
7677
"//third-party:quote",
7778
"//third-party:rustversion",
7879
"//third-party:syn",
80+
"//third-party:unicode-ident",
7981
],
8082
)
8183

@@ -101,6 +103,7 @@ rust_library(
101103
"//third-party:quote",
102104
"//third-party:scratch",
103105
"//third-party:syn",
106+
"//third-party:unicode-ident",
104107
],
105108
)
106109

@@ -125,5 +128,6 @@ rust_library(
125128
"//third-party:proc-macro2",
126129
"//third-party:quote",
127130
"//third-party:syn",
131+
"//third-party:unicode-ident",
128132
],
129133
)

BUILD.bazel

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ rust_binary(
3939
"@crates.io//:proc-macro2",
4040
"@crates.io//:quote",
4141
"@crates.io//:syn",
42+
"@crates.io//:unicode-ident",
4243
],
4344
)
4445

@@ -70,6 +71,7 @@ rust_proc_macro(
7071
"@crates.io//:proc-macro2",
7172
"@crates.io//:quote",
7273
"@crates.io//:syn",
74+
"@crates.io//:unicode-ident",
7375
],
7476
)
7577

@@ -87,6 +89,7 @@ rust_library(
8789
"@crates.io//:quote",
8890
"@crates.io//:scratch",
8991
"@crates.io//:syn",
92+
"@crates.io//:unicode-ident",
9093
],
9194
)
9295

@@ -104,5 +107,6 @@ rust_library(
104107
"@crates.io//:proc-macro2",
105108
"@crates.io//:quote",
106109
"@crates.io//:syn",
110+
"@crates.io//:unicode-ident",
107111
],
108112
)

gen/build/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ proc-macro2 = { version = "1.0.74", default-features = false, features = ["span-
2424
quote = { version = "1.0.35", default-features = false }
2525
scratch = "1.0.5"
2626
syn = { version = "2.0.46", default-features = false, features = ["clone-impls", "full", "parsing", "printing"] }
27+
unicode-ident = "1.0.22"
2728

2829
[dev-dependencies]
2930
cxx = { version = "1.0", path = "../.." }

gen/cmd/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ indexmap = "2.9.0"
2323
proc-macro2 = { version = "1.0.74", default-features = false, features = ["span-locations"] }
2424
quote = { version = "1.0.35", default-features = false }
2525
syn = { version = "2.0.46", default-features = false, features = ["clone-impls", "full", "parsing", "printing"] }
26+
unicode-ident = "1.0.22"
2627

2728
[package.metadata.docs.rs]
2829
targets = ["x86_64-unknown-linux-gnu"]

gen/lib/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ indexmap = "2.9.0"
1818
proc-macro2 = { version = "1.0.74", default-features = false, features = ["span-locations"] }
1919
quote = { version = "1.0.35", default-features = false }
2020
syn = { version = "2.0.46", default-features = false, features = ["clone-impls", "full", "parsing", "printing"] }
21+
unicode-ident = "1.0.22"
2122

2223
[package.metadata.docs.rs]
2324
targets = ["x86_64-unknown-linux-gnu"]

macro/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ indexmap = "2.9.0"
2020
proc-macro2 = "1.0.74"
2121
quote = "1.0.35"
2222
syn = { version = "2.0.46", features = ["full"] }
23+
unicode-ident = "1.0.22"
2324

2425
[dev-dependencies]
2526
cxx = { version = "1.0", path = ".." }

syntax/check.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::syntax::atom::Atom::{self, *};
22
use crate::syntax::message::Message;
3+
use crate::syntax::names::ForeignName;
34
use crate::syntax::report::Errors;
45
use crate::syntax::visit::{self, Visit};
56
use crate::syntax::{
@@ -12,7 +13,9 @@ use quote::{quote, ToTokens};
1213
use std::collections::HashSet;
1314
use std::fmt::Display;
1415
use std::sync::LazyLock;
15-
use syn::{GenericParam, Generics, Lifetime};
16+
use syn::ext::IdentExt;
17+
use syn::parse::Parser;
18+
use syn::{GenericParam, Generics, Ident, Lifetime};
1619

1720
pub(crate) struct Check<'a> {
1821
apis: &'a [Api],
@@ -476,7 +479,7 @@ fn check_api_type(cx: &mut Check, ety: &ExternType) {
476479
}
477480

478481
fn check_api_fn(cx: &mut Check, efn: &ExternFn) {
479-
check_name(cx, &efn.name);
482+
check_fn_name(cx, &efn.name);
480483
match efn.lang {
481484
Lang::Cxx | Lang::CxxUnwind => {
482485
if !efn.generics.params.is_empty() && !efn.trusted {
@@ -725,6 +728,24 @@ fn check_name(cx: &mut Check, name: &Pair) {
725728
let msg = format!("C++ reserved keyword can't be used as a C++ identifier: {cxx_name}");
726729
report_error(msg);
727730
}
731+
732+
// Most API names need to have a form of a valid C++ identifier. API names
733+
// that allow other forms (e.g. `operator==` for function names) should
734+
// check and allow those forms first (e.g. by `check_fn_name`), before
735+
// deciding to call `check_name`.
736+
if let Err(e) = Ident::parse_any.parse_str(cxx_name) {
737+
let msg = format!("Invalid C++ identifier: {e}");
738+
report_error(msg);
739+
}
740+
}
741+
742+
/// Checks `name` of a function or a method.
743+
fn check_fn_name(cx: &mut Check, name: &Pair) {
744+
if ForeignName::is_valid_operator_name(name.cxx.as_str()) {
745+
return;
746+
}
747+
748+
check_name(cx, name);
728749
}
729750

730751
/// Checks `name` of a type (e.g. a struct, enum, or a type alias).

syntax/names.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
use crate::syntax::symbol::Segment;
22
use crate::syntax::{Lifetimes, NamedType, Pair, Symbol};
33
use proc_macro2::{Ident, Span};
4+
use std::collections::HashSet;
45
use std::fmt::{self, Display};
56
use std::iter;
7+
use std::sync::LazyLock;
68
use syn::ext::IdentExt;
79
use syn::parse::{Error, Parser, Result};
810
use syn::punctuated::Punctuated;
@@ -37,8 +39,13 @@ impl NamedType {
3739

3840
impl ForeignName {
3941
pub(crate) fn parse(text: &str, span: Span) -> Result<Self> {
40-
// TODO: support C++ names containing whitespace (`unsigned int`) or
41-
// non-alphanumeric characters (`operator++`).
42+
if ForeignName::is_valid_operator_name(text) {
43+
return Ok(ForeignName {
44+
text: text.to_string(),
45+
span,
46+
});
47+
}
48+
4249
match Ident::parse_any.parse_str(text) {
4350
Ok(ident) => {
4451
let text = ident.to_string();
@@ -55,6 +62,26 @@ impl ForeignName {
5562
pub(crate) fn span(&self) -> Span {
5663
self.span
5764
}
65+
66+
pub(crate) fn is_valid_operator_name(name: &str) -> bool {
67+
#[rustfmt::skip]
68+
static CPP_OPERATORS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
69+
// Based on `llvm/llvm-project/clang/include/clang/Basic/OperatorKinds.def`.
70+
// Excluding `?` because it is not overridable.
71+
//
72+
// TODO: Consider also allowing `operator <type>`
73+
// (see https://en.cppreference.com/w/cpp/language/cast_operator.html).
74+
[
75+
" new", " delete", " new[]", " delete[]", " co_await",
76+
"+", "-", "*", "/", "%", "^", "&", "|", "~", "!", "=", "<", ">",
77+
"+=", "-=", "*=", "/=", "%=", "^=", "&=", "|=",
78+
"<<", ">>", "<<=", ">>=", "==", "!=", "<=", ">=", "<=>",
79+
"&&", "||", "++", "--", ",", "->*", "->", "()", "[]",
80+
].into_iter().collect()
81+
});
82+
name.strip_prefix("operator")
83+
.is_some_and(|suffix| CPP_OPERATORS.contains(suffix))
84+
}
5885
}
5986

6087
impl Display for ForeignName {

syntax/symbol.rs

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,34 @@ impl Segment for Pair {
9191

9292
impl Segment for ForeignName {
9393
fn write(&self, symbol: &mut Symbol) {
94-
// TODO: support C++ names containing whitespace (`unsigned int`) or
95-
// non-alphanumeric characters (`operator++`).
96-
self.to_string().write(symbol);
94+
/// Escapes arbitrary C++ name (e.g. `operator==`) into a String
95+
/// that is a valid C identifier. It is important that this is an
96+
/// [injective function](https://en.wikipedia.org/wiki/Injective_function)
97+
/// (i.e. distinct `name`s need to map to distinct results).
98+
fn escape(name: &str) -> String {
99+
let mut result = String::with_capacity(name.len());
100+
for (index, ch) in name.chars().enumerate() {
101+
if ch == '_' {
102+
write!(&mut result, "_u").unwrap();
103+
continue;
104+
}
105+
106+
let should_escape = if index == 0 {
107+
!unicode_ident::is_xid_start(ch)
108+
} else {
109+
!unicode_ident::is_xid_continue(ch)
110+
};
111+
if should_escape {
112+
write!(&mut result, "_{:x}h", ch as u32).unwrap();
113+
continue;
114+
}
115+
116+
write!(&mut result, "{ch}").unwrap();
117+
}
118+
result
119+
}
120+
121+
escape(self.as_str()).write(symbol);
97122
}
98123
}
99124

@@ -114,3 +139,33 @@ pub(crate) fn join(segments: &[&dyn Segment]) -> Symbol {
114139
assert!(!symbol.0.is_empty());
115140
symbol
116141
}
142+
143+
#[cfg(test)]
144+
mod test {
145+
use super::join;
146+
use crate::syntax::ForeignName;
147+
use proc_macro2::Span;
148+
149+
#[test]
150+
fn test_impl_segment_for_foreign_name() {
151+
fn t(foreign_name_str: &str, expected_symbol_str: &str) {
152+
let foreign_name = ForeignName::parse(foreign_name_str, Span::call_site()).unwrap();
153+
let symbol = join(&[&foreign_name]);
154+
let actual_symbol_str = symbol.to_string();
155+
assert_eq!(
156+
actual_symbol_str, expected_symbol_str,
157+
"Expecting `{foreign_name_str}` to mangle as `{expected_symbol_str}` \
158+
but got `{actual_symbol_str}` instead.",
159+
);
160+
}
161+
162+
t("foo", "foo");
163+
164+
// Escaping of non-identifier characters like `=`.
165+
t("operator==", "operator_3dh_3dh");
166+
167+
// Feeble attempt of testing injectivity
168+
// (need to escape `_` to avoid a conflict with result of the previous test).
169+
t("operator_3dh_3dh", "operator_u3dh_u3dh");
170+
}
171+
}

tests/ffi/lib.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ pub mod ffi {
234234
#[rust_name = "str_overloaded_function"]
235235
fn cOverloadedFunction(x: &str) -> String;
236236

237+
#[cxx_name = "operator=="]
238+
fn cpp_eq_operator(&self, other_n: usize) -> bool;
239+
237240
#[namespace = "other"]
238241
fn ns_c_take_ns_shared(shared: AShared);
239242

@@ -352,6 +355,9 @@ pub mod ffi {
352355
#[cxx_name = "rAliasedFunction"]
353356
fn r_aliased_function(x: i32) -> String;
354357

358+
#[cxx_name = "operator=="]
359+
fn r_operator_eq_for_usize_operand(self: &R, rhs: usize) -> bool;
360+
355361
#[Self = "Shared"]
356362
fn r_static_method_on_shared() -> usize;
357363

@@ -473,6 +479,10 @@ impl R {
473479
fn r_static_method() -> usize {
474480
2024
475481
}
482+
483+
fn r_operator_eq_for_usize_operand(&self, rhs: usize) -> bool {
484+
self.0 == rhs
485+
}
476486
}
477487

478488
pub struct Reference<'a>(pub &'a String);

0 commit comments

Comments
 (0)