Skip to content

Commit 61eb16d

Browse files
authored
Minor optimizations to assembler-generated code (#10811)
* Minor optimizations to assembler-generated code This is a few minor changes in the hopes of optimizing the compile-time of the generated code itself for the x64 assembler crate, including: * Add `derive(Copy)` to all instructions to benefit from a specialized implementation of `derive(Clone)` when a `Copy` implementation is also present (e.g. it's `*foo` instead of a structural `clone`-each-field). * Don't use `write!` in `Display for Inst` and instead delegate directly with methods to avoid formatting machinery. * Use helper methods in `RegisterVisitor` trait to have register allocation be a method-per-operand and shrink the methods a bit. * Fix clippy warnings
1 parent f2eff68 commit 61eb16d

File tree

8 files changed

+76
-48
lines changed

8 files changed

+76
-48
lines changed

cranelift/assembler-x64/meta/src/generate.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ fn generate_inst_enum(f: &mut Formatter, insts: &[dsl::Inst]) {
4747

4848
/// `#[derive(...)]`
4949
fn generate_derive(f: &mut Formatter) {
50-
fmtln!(f, "#[derive(Clone, Debug)]");
50+
fmtln!(f, "#[derive(Copy, Clone, Debug)]");
5151
fmtln!(
5252
f,
5353
"#[cfg_attr(any(test, feature = \"fuzz\"), derive(arbitrary::Arbitrary))]"
@@ -72,7 +72,7 @@ fn generate_inst_display_impl(f: &mut Formatter, insts: &[dsl::Inst]) {
7272
f.add_block("match self", |f| {
7373
for inst in insts {
7474
let variant_name = inst.name();
75-
fmtln!(f, "Self::{variant_name}(i) => write!(f, \"{{i}}\"),");
75+
fmtln!(f, "Self::{variant_name}(i) => i.fmt(f),");
7676
}
7777
});
7878
},

cranelift/assembler-x64/meta/src/generate/features.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ impl dsl::Feature {
1111
pub fn generate_enum(f: &mut Formatter) {
1212
fmtln!(f, "#[doc(hidden)]");
1313
generate_derive(f);
14-
fmtln!(f, "#[derive(Copy, PartialEq)]"); // Add a couple more helpful derives.
14+
fmtln!(f, "#[derive(PartialEq)]"); // Add more helpful derives.
1515
f.add_block("pub enum Feature", |f| {
1616
for feature in dsl::ALL_FEATURES {
1717
fmtln!(f, "{feature},");

cranelift/assembler-x64/meta/src/generate/inst.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,14 @@ impl dsl::Inst {
158158
RegMem(loc) => {
159159
let reg = reg.unwrap();
160160
let reg_lower = reg.to_string().to_lowercase();
161-
f.add_block(&format!("match &mut self.{loc}"), |f| {
162-
fmtln!(f, "{reg}Mem::{reg}(r) => visitor.{mutability}_{reg_lower}(r),");
163-
fmtln!(f, "{reg}Mem::Mem(m) => visit_amode(m, visitor),");
164-
});
161+
fmtln!(f, "visitor.{mutability}_{reg_lower}_mem(&mut self.{loc});");
165162
}
166163
Mem(loc) => {
167-
fmtln!(f, "visit_amode(&mut self.{loc}, visitor);");
164+
// Note that this is always "read" because from a
165+
// regalloc perspective when using an amode it means
166+
// that the while a write is happening that's to
167+
// memory, not registers.
168+
fmtln!(f, "visitor.read_amode(&mut self.{loc});");
168169
}
169170
}
170171
}

cranelift/assembler-x64/src/api.rs

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use crate::gpr;
44
use crate::xmm;
5+
use crate::{Amode, GprMem, XmmMem};
56
use std::{num::NonZeroU8, ops::Index, vec::Vec};
67

78
/// Describe how an instruction is emitted into a code buffer.
@@ -67,12 +68,12 @@ impl CodeSink for Vec<u8> {
6768
}
6869

6970
/// Wrap [`CodeSink`]-specific labels.
70-
#[derive(Debug, Clone)]
71+
#[derive(Debug, Copy, Clone)]
7172
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
7273
pub struct Label(pub u32);
7374

7475
/// Wrap [`CodeSink`]-specific constant keys.
75-
#[derive(Debug, Clone)]
76+
#[derive(Debug, Copy, Clone)]
7677
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
7778
pub struct Constant(pub u32);
7879

@@ -125,7 +126,7 @@ pub trait Registers {
125126
}
126127

127128
/// Describe how to interact with an external register type.
128-
pub trait AsReg: Clone + std::fmt::Debug {
129+
pub trait AsReg: Copy + Clone + std::fmt::Debug {
129130
/// Create a register from its hardware encoding.
130131
///
131132
/// This is primarily useful for fuzzing, though it is also useful for
@@ -194,4 +195,46 @@ pub trait RegisterVisitor<R: Registers> {
194195
/// Visit a read-only fixed SSE register; this register can be modified
195196
/// in-place but must emit as the hardware encoding `enc`.
196197
fn fixed_write_xmm(&mut self, reg: &mut R::WriteXmm, enc: u8);
198+
199+
/// Visit the registers in an [`Amode`].
200+
///
201+
/// This is helpful for generated code: it allows capturing the `R::ReadGpr`
202+
/// type (which an `Amode` method cannot) and simplifies the code to be
203+
/// generated.
204+
fn read_amode(&mut self, amode: &mut Amode<R::ReadGpr>) {
205+
match amode {
206+
Amode::ImmReg { base, .. } => {
207+
self.read_gpr(base);
208+
}
209+
Amode::ImmRegRegShift { base, index, .. } => {
210+
self.read_gpr(base);
211+
self.read_gpr(index.as_mut());
212+
}
213+
Amode::RipRelative { .. } => {}
214+
}
215+
}
216+
217+
/// Helper method to handle a read/write [`GprMem`] operand.
218+
fn read_write_gpr_mem(&mut self, op: &mut GprMem<R::ReadWriteGpr, R::ReadGpr>) {
219+
match op {
220+
GprMem::Gpr(r) => self.read_write_gpr(r),
221+
GprMem::Mem(m) => self.read_amode(m),
222+
}
223+
}
224+
225+
/// Helper method to handle a read-only [`GprMem`] operand.
226+
fn read_gpr_mem(&mut self, op: &mut GprMem<R::ReadGpr, R::ReadGpr>) {
227+
match op {
228+
GprMem::Gpr(r) => self.read_gpr(r),
229+
GprMem::Mem(m) => self.read_amode(m),
230+
}
231+
}
232+
233+
/// Helper method to handle a read-only [`XmmMem`] operand.
234+
fn read_xmm_mem(&mut self, op: &mut XmmMem<R::ReadXmm, R::ReadGpr>) {
235+
match op {
236+
XmmMem::Xmm(r) => self.read_xmm(r),
237+
XmmMem::Mem(m) => self.read_amode(m),
238+
}
239+
}
197240
}

cranelift/assembler-x64/src/fixed.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::AsReg;
2222
/// let fixed = Fixed::<u8, { gpr::enc::RAX }>(invalid_reg);
2323
/// fixed.enc(); // Will panic because `invalid_reg` does not match `RAX`.
2424
/// ```
25-
#[derive(Clone, Debug)]
25+
#[derive(Copy, Clone, Debug)]
2626
pub struct Fixed<R, const E: u8>(pub R);
2727

2828
impl<R, const E: u8> Fixed<R, E> {
@@ -52,3 +52,9 @@ impl<R, const E: u8> AsRef<R> for Fixed<R, E> {
5252
&self.0
5353
}
5454
}
55+
56+
impl<R, const E: u8> From<R> for Fixed<R, E> {
57+
fn from(reg: R) -> Fixed<R, E> {
58+
Fixed(reg)
59+
}
60+
}

cranelift/assembler-x64/src/imm.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ impl TryFrom<i32> for Simm8 {
107107
}
108108

109109
/// A 16-bit immediate operand.
110-
#[derive(Clone, Debug)]
110+
#[derive(Copy, Clone, Debug)]
111111
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
112112
pub struct Imm16(u16);
113113

@@ -147,7 +147,7 @@ impl fmt::Display for Imm16 {
147147
}
148148

149149
/// A _signed_ 16-bit immediate operand (suitable for sign extension).
150-
#[derive(Clone, Debug)]
150+
#[derive(Copy, Clone, Debug)]
151151
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
152152
pub struct Simm16(i16);
153153

@@ -196,7 +196,7 @@ impl TryFrom<i32> for Simm16 {
196196
/// Note that, "in 64-bit mode, the typical size of immediate operands remains
197197
/// 32 bits. When the operand size is 64 bits, the processor sign-extends all
198198
/// immediates to 64 bits prior to their use" (Intel SDM Vol. 2, 2.2.1.5).
199-
#[derive(Clone, Debug)]
199+
#[derive(Copy, Clone, Debug)]
200200
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
201201
pub struct Imm32(u32);
202202

@@ -240,7 +240,7 @@ impl fmt::Display for Imm32 {
240240
/// Note that, "in 64-bit mode, the typical size of immediate operands remains
241241
/// 32 bits. When the operand size is 64 bits, the processor sign-extends all
242242
/// immediates to 64 bits prior to their use" (Intel SDM Vol. 2, 2.2.1.5).
243-
#[derive(Clone, Debug)]
243+
#[derive(Copy, Clone, Debug)]
244244
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
245245
pub struct Simm32(i32);
246246

cranelift/assembler-x64/src/inst.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::Fixed;
77
use crate::api::{AsReg, CodeSink, KnownOffsetTable, RegisterVisitor, Registers};
88
use crate::gpr::{self, Gpr, Size};
99
use crate::imm::{Extension, Imm8, Imm16, Imm32, Simm8, Simm32};
10-
use crate::mem::{Amode, GprMem, XmmMem, visit_amode};
10+
use crate::mem::{Amode, GprMem, XmmMem};
1111
use crate::rex::RexPrefix;
1212
use crate::xmm::Xmm;
1313

cranelift/assembler-x64/src/mem.rs

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
use crate::api::{AsReg, CodeSink, Constant, KnownOffset, KnownOffsetTable, Label, TrapCode};
44
use crate::gpr::{self, NonRspGpr, Size};
55
use crate::rex::{Disp, RexPrefix, encode_modrm, encode_sib};
6-
use crate::{RegisterVisitor, Registers};
76

87
/// x64 memory addressing modes.
9-
#[derive(Clone, Debug)]
8+
#[derive(Copy, Clone, Debug)]
109
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
1110
pub enum Amode<R: AsReg> {
1211
ImmReg {
@@ -62,27 +61,6 @@ impl<R: AsReg> Amode<R> {
6261
}
6362
}
6463

65-
/// Visit the registers in an [`Amode`].
66-
///
67-
/// This is helpful for generated code: it allows capturing the `R::ReadGpr`
68-
/// type (which an `Amode` method cannot) and simplifies the code to be
69-
/// generated.
70-
pub(crate) fn visit_amode<R: Registers>(
71-
amode: &mut Amode<R::ReadGpr>,
72-
visitor: &mut impl RegisterVisitor<R>,
73-
) {
74-
match amode {
75-
Amode::ImmReg { base, .. } => {
76-
visitor.read_gpr(base);
77-
}
78-
Amode::ImmRegRegShift { base, index, .. } => {
79-
visitor.read_gpr(base);
80-
visitor.read_gpr(index.as_mut());
81-
}
82-
Amode::RipRelative { .. } => {}
83-
}
84-
}
85-
8664
/// A 32-bit immediate for address offsets.
8765
#[derive(Clone, Copy, Debug)]
8866
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
@@ -134,7 +112,7 @@ impl std::fmt::LowerHex for AmodeOffset {
134112
/// happens immediately before emission:
135113
/// - the [`KnownOffset`] is looked up, mapping it to an offset value
136114
/// - the [`Simm32`] value is added to the offset value
137-
#[derive(Clone, Debug)]
115+
#[derive(Copy, Clone, Debug)]
138116
pub struct AmodeOffsetPlusKnownOffset {
139117
pub simm32: AmodeOffset,
140118
pub offset: Option<KnownOffset>,
@@ -166,7 +144,7 @@ impl std::fmt::LowerHex for AmodeOffsetPlusKnownOffset {
166144
}
167145

168146
/// For RIP-relative addressing, keep track of the [`CodeSink`]-specific target.
169-
#[derive(Clone, Debug)]
147+
#[derive(Copy, Clone, Debug)]
170148
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
171149
pub enum DeferredTarget {
172150
Label(Label),
@@ -205,7 +183,7 @@ impl<R: AsReg> std::fmt::Display for Amode<R> {
205183
}
206184

207185
/// The scaling factor for the index register in certain [`Amode`]s.
208-
#[derive(Clone, Debug)]
186+
#[derive(Copy, Clone, Debug)]
209187
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
210188
pub enum Scale {
211189
One,
@@ -252,7 +230,7 @@ impl Scale {
252230
}
253231

254232
/// A general-purpose register or memory operand.
255-
#[derive(Clone, Debug)]
233+
#[derive(Copy, Clone, Debug)]
256234
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
257235
#[allow(
258236
clippy::module_name_repetitions,
@@ -313,7 +291,7 @@ impl<R: AsReg, M: AsReg> From<Amode<M>> for GprMem<R, M> {
313291
}
314292

315293
/// An XMM register or memory operand.
316-
#[derive(Clone, Debug)]
294+
#[derive(Copy, Clone, Debug)]
317295
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
318296
#[allow(
319297
clippy::module_name_repetitions,
@@ -382,7 +360,7 @@ fn emit_modrm_sib_disp<R: AsReg>(
382360
bytes_at_end: u8,
383361
evex_scaling: Option<i8>,
384362
) {
385-
match mem_e.clone() {
363+
match *mem_e {
386364
Amode::ImmReg { simm32, base, .. } => {
387365
let enc_e = base.enc();
388366
let mut imm = Disp::new(simm32.value(offsets), evex_scaling);
@@ -450,8 +428,8 @@ fn emit_modrm_sib_disp<R: AsReg>(
450428

451429
let offset = sink.current_offset();
452430
let target = match target {
453-
DeferredTarget::Label(label) => label.clone(),
454-
DeferredTarget::Constant(constant) => sink.get_label_for_constant(constant.clone()),
431+
DeferredTarget::Label(label) => label,
432+
DeferredTarget::Constant(constant) => sink.get_label_for_constant(constant),
455433
};
456434
sink.use_label_at_offset(offset, target);
457435

0 commit comments

Comments
 (0)