-
Notifications
You must be signed in to change notification settings - Fork 369
Expand file tree
/
Copy pathdata.rs
More file actions
628 lines (558 loc) · 21.4 KB
/
Copy pathdata.rs
File metadata and controls
628 lines (558 loc) · 21.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is dual-licensed under either the MIT license found in the
* LICENSE-MIT file in the root directory of this source tree or the Apache
* License, Version 2.0 found in the LICENSE-APACHE file in the root directory
* of this source tree. You may select, at your option, one of the
* above-listed licenses.
*/
use std::collections::BTreeMap;
use std::hash::Hash;
use std::hash::Hasher;
use allocative::Allocative;
use buck2_data::ToProtoMessage;
use buck2_hash::BuckHasher;
use buck2_util::strong_hasher::Blake3StrongHasher;
use dupe::Dupe;
use equivalent::Equivalent;
use once_cell::sync::Lazy;
use once_cell::sync::OnceCell;
use pagable::Pagable;
use serde::Serialize;
use serde::Serializer;
use static_interner::Intern;
use static_interner::InternDisposition;
use static_interner::interner;
use strong_hash::StrongHash;
use crate::configuration::bound_id::BoundConfigurationId;
use crate::configuration::bound_label::BoundConfigurationLabel;
use crate::configuration::builtin::BuiltinPlatform;
use crate::configuration::constraints::ConstraintKey;
use crate::configuration::constraints::ConstraintValue;
use crate::configuration::hash::ConfigurationHash;
use crate::event::EVENT_DISPATCH;
/// Whether to include `is_marked_as_exec_platform` in the configuration hash.
/// When true, execution platforms will have a different hash from target platforms
/// with the same constraints. Default is false for backwards compatibility.
pub static HASH_CFG_WITH_EXEC_PLATFORM: OnceCell<bool> = OnceCell::new();
pub fn init_hash_cfg_with_exec_platform(value: Option<bool>) -> buck2_error::Result<()> {
let value = value.unwrap_or(false);
HASH_CFG_WITH_EXEC_PLATFORM.set(value).map_err(|_| {
buck2_error::buck2_error!(
buck2_error::ErrorTag::Tier0,
"HASH_CFG_WITH_EXEC_PLATFORM is already initialized"
)
})?;
Ok(())
}
#[derive(Debug, buck2_error::Error)]
#[buck2(input)]
enum ConfigurationError {
#[error(
"Attempted to access the configuration data for the {0} platform. \
This platform is used when the global default platform is unspecified \
and in that case configuration features (like `select()`) are unsupported."
)]
Builtin(BuiltinPlatform),
#[error("Platform is not bound: {0}")]
NotBound(String),
#[error(
"Attempted to access the configuration data for the \"unspecified_exec\" platform. This platform is used when no execution platform was resolved for a target."
)]
UnspecifiedExec,
}
#[derive(Debug, buck2_error::Error)]
#[buck2(input)]
enum ConfigurationLookupError {
#[error("
Could not find configuration `{0}`. Configuration lookup by string requires
that buck has already loaded the configuration through some other mechanism. You can run `buck2 cquery <some_target>`
with a target that uses the configuration (somewhere in its graph) to make buck aware of the configuration first.
")]
ConfigNotFound(BoundConfigurationId),
#[error(
"Found configuration `{0}` by hash, but label mismatched from what is requested: `{1}`"
)]
ConfigFoundByHashLabelMismatch(ConfigurationData, BoundConfigurationId),
}
fn emit_configuration_instant_event(cfg: &ConfigurationData) -> buck2_error::Result<()> {
let constraints: Vec<buck2_data::Constraint> = cfg
.data()?
.constraints
.iter()
.map(|(k, v)| buck2_data::Constraint {
setting: k.to_string(),
value: v.to_string(),
})
.collect();
// Sometimes this isn't going to be init'd in tests (oss or buck2), let's
// ignore that and rely on e2e test to assert we're still logging data from
// production code paths.
if let Ok(event_dispatch) = EVENT_DISPATCH.get() {
event_dispatch.emit_instant_event_for_data(
buck2_data::ConfigurationCreated {
cfg: Some(buck2_data::ConfigurationWithConstraints {
full_name: cfg.full_name().to_owned(),
constraint: constraints,
}),
}
.into(),
);
}
Ok(())
}
/// The inner PlatformConfigurationData is interned as the same configuration could be formed through
/// paths (as many transitions are associative).
#[derive(
Clone,
Debug,
Eq,
PartialEq,
Hash,
Dupe,
Ord,
PartialOrd,
Allocative,
derive_more::Display,
StrongHash,
Pagable
)]
pub struct ConfigurationData(Intern<HashedConfigurationPlatform>);
#[derive(Hash)]
struct ConfigurationHashRef<'a>(&'a str);
impl Equivalent<HashedConfigurationPlatform> for ConfigurationHashRef<'_> {
fn equivalent(&self, key: &HashedConfigurationPlatform) -> bool {
self.0 == key.output_hash.as_str()
}
}
interner!(INTERNER, BuckHasher, HashedConfigurationPlatform);
impl ConfigurationData {
/// Produces a "bound" configuration for a platform. The label should be a unique identifier for the data.
pub fn from_platform(
label: String,
data: ConfigurationDataData,
is_marked_as_exec_platform: bool,
) -> buck2_error::Result<Self> {
let label = BoundConfigurationLabel::new(label)?;
let (cfg, disposition) = Self::from_data(HashedConfigurationPlatform::new(
ConfigurationPlatform::Bound(label, data, is_marked_as_exec_platform),
));
if let InternDisposition::Computed = disposition {
emit_configuration_instant_event(&cfg)?;
}
Ok(cfg)
}
pub fn unspecified() -> Self {
static CONFIG: Lazy<ConfigurationData> = Lazy::new(|| {
ConfigurationData::from_data(HashedConfigurationPlatform::new(
ConfigurationPlatform::Builtin(BuiltinPlatform::Unspecified),
))
.0
});
CONFIG.dupe()
}
pub fn unspecified_exec() -> Self {
static CONFIG: Lazy<ConfigurationData> = Lazy::new(|| {
ConfigurationData::from_data(HashedConfigurationPlatform::new(
ConfigurationPlatform::Builtin(BuiltinPlatform::UnspecifiedExec),
))
.0
});
CONFIG.dupe()
}
/// Produces the "unbound" configuration. This is used only when performing analysis of platform() targets and
/// their dependencies (which is done to form the initial "bound" configurations).
pub fn unbound() -> Self {
static CONFIG: Lazy<ConfigurationData> = Lazy::new(|| {
ConfigurationData::from_data(HashedConfigurationPlatform::new(
ConfigurationPlatform::Builtin(BuiltinPlatform::Unbound),
))
.0
});
CONFIG.dupe()
}
/// Produces the "unbound_exec" configuration. This is used only when getting the exec_deps for a configured node
/// before we've determined the execution configuration for the node.
pub fn unbound_exec() -> Self {
static CONFIG: Lazy<ConfigurationData> = Lazy::new(|| {
ConfigurationData::from_data(HashedConfigurationPlatform::new(
ConfigurationPlatform::Builtin(BuiltinPlatform::UnboundExec),
))
.0
});
CONFIG.dupe()
}
pub fn builtin(builtin: BuiltinPlatform) -> Self {
match builtin {
BuiltinPlatform::Unspecified => Self::unspecified(),
BuiltinPlatform::UnspecifiedExec => Self::unspecified_exec(),
BuiltinPlatform::Unbound => Self::unbound(),
BuiltinPlatform::UnboundExec => Self::unbound_exec(),
}
}
/// Produces an "invalid" configuration for testing.
pub fn testing_new() -> Self {
Self::from_data(HashedConfigurationPlatform::new(
ConfigurationPlatform::Bound(
BoundConfigurationLabel::new("<testing>".to_owned()).unwrap(),
ConfigurationDataData {
constraints: BTreeMap::new(),
},
false,
),
))
.0
}
fn from_data(data: HashedConfigurationPlatform) -> (Self, InternDisposition) {
let (val, disposition) = INTERNER.observed_intern(data);
(Self(val), disposition)
}
/// Iterates over the existing interned configurations. As these configurations
/// are never evicted, this may return configurations that aren't present in the
/// actual current state (for example, if you do a build and then delete everything
/// this will still iterate over previously existing configurations).
pub fn iter_existing() -> impl Iterator<Item = Self> {
INTERNER.iter().map(Self)
}
/// Looks up a known configuration from a `Configuration::full_name()` string. Generally
/// this is a debugging utility that most buck code shouldn't use, it's primarily useful
/// for resolving configuration strings provided on the command line.
///
/// This can only find configurations that have otherwise already been encountered by
/// the current daemon process.
pub fn lookup_bound(cfg: BoundConfigurationId) -> buck2_error::Result<Self> {
match INTERNER.get(ConfigurationHashRef(cfg.hash.as_str())) {
Some(found_cfg) => {
let found_cfg = ConfigurationData(found_cfg);
if found_cfg.bound_id().as_ref() != Some(&cfg) {
Err(
ConfigurationLookupError::ConfigFoundByHashLabelMismatch(found_cfg, cfg)
.into(),
)
} else {
Ok(found_cfg)
}
}
None => Err(ConfigurationLookupError::ConfigNotFound(cfg).into()),
}
}
pub fn get_constraint_value(
&self,
key: &ConstraintKey,
) -> buck2_error::Result<Option<&ConstraintValue>> {
Ok(self.data()?.constraints.get(key))
}
pub fn label(&self) -> buck2_error::Result<&str> {
match &self.0.configuration_platform {
ConfigurationPlatform::Bound(label, _, _) => Ok(label.as_str()),
_ => Err(ConfigurationError::NotBound(self.to_string()).into()),
}
}
pub fn data(&self) -> buck2_error::Result<&ConfigurationDataData> {
match &self.0.configuration_platform {
ConfigurationPlatform::Builtin(BuiltinPlatform::UnspecifiedExec) => {
Err(ConfigurationError::UnspecifiedExec.into())
}
ConfigurationPlatform::Builtin(builtin) => {
Err(ConfigurationError::Builtin(*builtin).into())
}
ConfigurationPlatform::Bound(_, data, _) => Ok(data),
}
}
pub fn is_unbound(&self) -> bool {
matches!(
&self.0.configuration_platform,
ConfigurationPlatform::Builtin(BuiltinPlatform::Unbound)
)
}
pub fn bound(&self) -> Option<&BoundConfigurationLabel> {
match &self.0.configuration_platform {
ConfigurationPlatform::Bound(label, _, _) => Some(label),
_ => None,
}
}
pub fn bound_id(&self) -> Option<BoundConfigurationId> {
Some(BoundConfigurationId {
label: self.bound()?.clone(),
hash: self.output_hash().clone(),
})
}
pub fn is_bound(&self) -> bool {
matches!(
&self.0.configuration_platform,
ConfigurationPlatform::Bound(..)
)
}
pub fn is_marked_as_exec_platform(&self) -> bool {
matches!(
&self.0.configuration_platform,
ConfigurationPlatform::Bound(_, _, true)
)
}
pub fn output_hash(&self) -> &ConfigurationHash {
&self.0.output_hash
}
/// Name without hash.
pub fn short_name(&self) -> &str {
self.0.configuration_platform.label()
}
pub fn full_name(&self) -> &str {
&self.0.full_name
}
}
impl Serialize for ConfigurationData {
fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
s.collect_str(self)
}
}
impl ToProtoMessage for ConfigurationData {
type Message = buck2_data::Configuration;
fn as_proto(&self) -> Self::Message {
buck2_data::Configuration {
full_name: self.full_name().to_owned(),
}
}
}
#[derive(
Debug, Hash, Eq, PartialEq, Ord, PartialOrd, Allocative, StrongHash, Pagable
)]
enum ConfigurationPlatform {
/// This represents the normal case where a platform has been defined by a `platform()` (or similar) target.
/// The `bool` indicates whether the user provided a modifier constraint to mark this as an execution platform.
Bound(BoundConfigurationLabel, ConfigurationDataData, bool),
Builtin(BuiltinPlatform),
}
impl ConfigurationPlatform {
fn label(&self) -> &str {
match self {
ConfigurationPlatform::Bound(label, _, _) => label.as_str(),
ConfigurationPlatform::Builtin(builtin) => builtin.label(),
}
}
}
/// A set of values used in configuration-related contexts.
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Allocative, StrongHash, Pagable)]
pub struct ConfigurationDataData {
// contains the full specification of the platform configuration
pub constraints: BTreeMap<ConstraintKey, ConstraintValue>,
}
/// We don't use derive(Hash) here because we build Buck 2 on two different versions of Rustc at
/// the moment, and their hashing disagrees <https://github.com/rust-lang/rust/pull/89443>. In any
/// case, we should control what goes into our hash here.
#[allow(clippy::derived_hash_with_manual_eq)]
impl Hash for ConfigurationDataData {
fn hash<H: Hasher>(&self, state: &mut H) {
for elt in self.constraints.iter() {
elt.hash(state);
}
}
}
impl ConfigurationDataData {
pub fn empty() -> Self {
Self {
constraints: Default::default(),
}
}
pub fn new(constraints: BTreeMap<ConstraintKey, ConstraintValue>) -> Self {
Self { constraints }
}
pub fn get_constraint_value(&self, key: &ConstraintKey) -> Option<&ConstraintValue> {
self.constraints.get(key)
}
/// merges this into other, with values in other taking precedence
pub fn merge(&self, mut other: ConfigurationDataData) -> Self {
for (k, v) in &self.constraints {
other
.constraints
.entry(k.dupe())
.or_insert_with(|| v.dupe());
}
other
}
}
#[derive(Debug, Allocative, derive_more::Display, Pagable)]
#[display("{}", full_name)]
pub(crate) struct HashedConfigurationPlatform {
configuration_platform: ConfigurationPlatform,
// The remaining fields are computed from `platform_configuration_data`.
/// The "full name" includes both the platform and a hash of the configuration data.
full_name: String,
/// A hash of the configuration data that is used for determining output paths.
output_hash: ConfigurationHash,
}
/// Hash, equality, and ordering use only `output_hash` so that they are
/// always consistent with each other. Whether `is_exec_platform` is folded
/// into `output_hash` depends on `HASH_CFG_WITH_EXEC_PLATFORM`.
impl std::hash::Hash for HashedConfigurationPlatform {
fn hash<H: Hasher>(&self, state: &mut H) {
self.output_hash.hash(state)
}
}
impl PartialEq for HashedConfigurationPlatform {
fn eq(&self, other: &Self) -> bool {
self.output_hash == other.output_hash
}
}
impl Eq for HashedConfigurationPlatform {}
impl PartialOrd for HashedConfigurationPlatform {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for HashedConfigurationPlatform {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.output_hash.cmp(&other.output_hash)
}
}
impl StrongHash for HashedConfigurationPlatform {
fn strong_hash<H: Hasher>(&self, state: &mut H) {
// This is already a strong hash (computed a few lines below).
self.output_hash.hash(state)
}
}
impl HashedConfigurationPlatform {
fn new(configuration_platform: ConfigurationPlatform) -> Self {
let hash_with_exec_platform = *HASH_CFG_WITH_EXEC_PLATFORM.get().unwrap_or(&false);
let output_hash = if hash_with_exec_platform {
let mut hasher = Blake3StrongHasher::new();
configuration_platform.strong_hash(&mut hasher);
ConfigurationHash::new(hasher.finish())
} else {
// Exclude `is_marked_as_exec_platform` so that it doesn't affect output paths.
let mut hasher = Blake3StrongHasher::new();
match &configuration_platform {
ConfigurationPlatform::Bound(label, data, _is_marked_as_exec_platform) => {
StrongHash::strong_hash("Bound", &mut hasher);
label.strong_hash(&mut hasher);
data.strong_hash(&mut hasher);
}
ConfigurationPlatform::Builtin(builtin) => {
StrongHash::strong_hash("Builtin", &mut hasher);
builtin.strong_hash(&mut hasher);
}
}
ConfigurationHash::new(hasher.finish())
};
let full_name = match &configuration_platform {
ConfigurationPlatform::Bound(label, _cfg, _) => {
format!("{label:#}#{output_hash}")
}
ConfigurationPlatform::Builtin(builtin) => builtin.label().to_owned(),
};
Self {
configuration_platform,
full_name,
output_hash,
}
}
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use crate::configuration::bound_id::BoundConfigurationId;
use crate::configuration::constraints::ConstraintKey;
use crate::configuration::constraints::ConstraintValue;
use crate::configuration::data::ConfigurationData;
use crate::configuration::data::ConfigurationDataData;
/// We don't want the output hash to change by accident. This test is here to assert that it
/// doesn't. If we have a legit reason to update the config hash, we can update the hash here,
/// but this will ensure we a) know and b) don't do it by accident.
#[test]
fn test_stable_output_hash() -> buck2_error::Result<()> {
let configuration = ConfigurationData::from_platform(
"cfg_for//:testing_exec".to_owned(),
ConfigurationDataData {
constraints: BTreeMap::from_iter([
(
ConstraintKey::testing_new("foo//bar:c"),
ConstraintValue::testing_new("foo//bar:v", None),
),
(
ConstraintKey::testing_new("foo//qux:c"),
ConstraintValue::testing_new("foo//qux:vx", None),
),
]),
},
false,
)
.unwrap();
assert_eq!(configuration.output_hash().as_str(), "6770d7f2ebfc0845");
assert_eq!(
configuration.to_string(),
"cfg_for//:testing_exec#6770d7f2ebfc0845"
);
Ok(())
}
#[test]
fn test_lookup_from_string() {
let configuration = ConfigurationData::from_platform(
"cfg_for//:testing_exec".to_owned(),
ConfigurationDataData {
constraints: BTreeMap::from_iter([
(
ConstraintKey::testing_new("foo//bar:c"),
ConstraintValue::testing_new("foo//bar:v", None),
),
(
ConstraintKey::testing_new("foo//qux:c"),
ConstraintValue::testing_new("foo//qux:vx", None),
),
]),
},
false,
)
.unwrap();
let expected_cfg_str = "cfg_for//:testing_exec#6770d7f2ebfc0845";
assert_eq!(expected_cfg_str, configuration.to_string());
let looked_up =
ConfigurationData::lookup_bound(BoundConfigurationId::parse(expected_cfg_str).unwrap())
.unwrap();
assert_eq!(configuration, looked_up);
}
/// With `HASH_CFG_WITH_EXEC_PLATFORM` unset (the default), configs that
/// differ only in `is_exec_platform` must be equal with matching hashes.
#[test]
fn test_exec_vs_non_exec_hash_eq_consistency() {
use std::collections::hash_map::DefaultHasher;
use std::hash::Hash;
use std::hash::Hasher;
fn compute_hash(cfg: &ConfigurationData) -> u64 {
let mut hasher = DefaultHasher::new();
cfg.hash(&mut hasher);
hasher.finish()
}
let constraints = BTreeMap::from_iter([(
ConstraintKey::testing_new("foo//bar:c"),
ConstraintValue::testing_new("foo//bar:v", None),
)]);
let cfg_non_exec = ConfigurationData::from_platform(
"my//platform:cfg".to_owned(),
ConfigurationDataData {
constraints: constraints.clone(),
},
false,
)
.unwrap();
let cfg_exec = ConfigurationData::from_platform(
"my//platform:cfg".to_owned(),
ConfigurationDataData {
constraints: constraints.clone(),
},
true,
)
.unwrap();
assert_eq!(
cfg_non_exec, cfg_exec,
"Configs differing only in is_exec_platform must be equal"
);
assert_eq!(
compute_hash(&cfg_non_exec),
compute_hash(&cfg_exec),
"Equal configs must have equal hashes"
);
}
}