Skip to content

Commit 819eded

Browse files
Add tfport-data-link timeseries schema and SMF config adds.
Related to https://github.com/oxidecomputer/dendrite/pull/1033. New timeseries (from sled-data-link) as we've added switch information in the metadata.
1 parent ab1f377 commit 819eded

File tree

7 files changed

+270
-30
lines changed

7 files changed

+270
-30
lines changed

Diff for: common/src/address.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ pub const MGS_PORT: u16 = 12225;
4848
pub const WICKETD_PORT: u16 = 12226;
4949
pub const BOOTSTRAP_ARTIFACT_PORT: u16 = 12227;
5050
pub const CRUCIBLE_PANTRY_PORT: u16 = 17000;
51-
51+
pub const TFPORTD_PORT: u16 = 12231;
5252
pub const NEXUS_INTERNAL_PORT: u16 = 12221;
5353

5454
/// The port on which Nexus exposes its external API on the underlay network.

Diff for: oximeter/oximeter/schema/tfport-data-link.toml

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
format_version = 1
2+
3+
[target]
4+
name = "tfport_data_link"
5+
description = "A network data link on a tfport interface"
6+
authz_scope = "fleet"
7+
8+
versions = [
9+
{ version = 1, fields = [ "kind", "link_name", "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_fab", "switch_lot", "switch_wafer", "switch_wafer_loc_x", "switch_wafer_loc_y", "switch_model", "switch_revision", "switch_serial", "switch_slot" ] },
10+
]
11+
12+
[fields.kind]
13+
type = "string"
14+
description = "The kind or class of the data link"
15+
16+
[fields.link_name]
17+
type = "string"
18+
description = "Name of the data link"
19+
20+
[fields.rack_id]
21+
type = "uuid"
22+
description = "ID for the link's rack"
23+
24+
[fields.sled_id]
25+
type = "uuid"
26+
description = "ID for the link's sled"
27+
28+
[fields.sled_model]
29+
type = "string"
30+
description = "Model number of the link's sled"
31+
32+
[fields.sled_revision]
33+
type = "u32"
34+
description = "Revision number of the sled"
35+
36+
[fields.sled_serial]
37+
type = "string"
38+
description = "Serial number of the sled"
39+
40+
[fields.switch_id]
41+
type = "uuid"
42+
description = "ID of the switch the link is on"
43+
44+
[fields.switch_fab]
45+
type = "string"
46+
description = "Fabrication plant identifier of the switch the link is on"
47+
48+
[fields.switch_lot]
49+
type = "string"
50+
description = "Lot number of the switch the link is on"
51+
52+
[fields.switch_wafer]
53+
type = "u8"
54+
description = "Wafer number of the switch the link is on"
55+
56+
[fields.switch_wafer_loc_x]
57+
type = "i8"
58+
description = "X-coordinate wafer location of the switch the link is on"
59+
60+
[fields.switch_wafer_loc_y]
61+
type = "i8"
62+
description = "Y-coordinate wafer location of the switch the link is on"
63+
64+
[fields.switch_model]
65+
type = "string"
66+
description = "The model number switch the link is on"
67+
68+
[fields.switch_revision]
69+
type = "u32"
70+
description = "Revision number of the switch the link is on"
71+
72+
[fields.switch_serial]
73+
type = "string"
74+
description = "Serial number of the switch the link is on"
75+
76+
[fields.switch_slot]
77+
type = "u16"
78+
description = "Slot number of the switch the link is on"
79+
80+
[[metrics]]
81+
name = "bytes_sent"
82+
description = "Number of bytes sent on the link"
83+
units = "bytes"
84+
datum_type = "cumulative_u64"
85+
versions = [
86+
{ added_in = 1, fields = [] }
87+
]
88+
89+
[[metrics]]
90+
name = "bytes_received"
91+
description = "Number of bytes received on the link"
92+
units = "bytes"
93+
datum_type = "cumulative_u64"
94+
versions = [
95+
{ added_in = 1, fields = [] }
96+
]
97+
98+
[[metrics]]
99+
name = "packets_sent"
100+
description = "Number of packets sent on the link"
101+
units = "count"
102+
datum_type = "cumulative_u64"
103+
versions = [
104+
{ added_in = 1, fields = [] }
105+
]
106+
107+
[[metrics]]
108+
name = "packets_received"
109+
description = "Number of packets received on the link"
110+
units = "count"
111+
datum_type = "cumulative_u64"
112+
versions = [
113+
{ added_in = 1, fields = [] }
114+
]
115+
116+
[[metrics]]
117+
name = "errors_sent"
118+
description = "Number of errors encountered when sending on the link"
119+
units = "count"
120+
datum_type = "cumulative_u64"
121+
versions = [
122+
{ added_in = 1, fields = [] }
123+
]
124+
125+
[[metrics]]
126+
name = "errors_received"
127+
description = "Number of errors encountered when receiving on the link"
128+
units = "count"
129+
datum_type = "cumulative_u64"
130+
versions = [
131+
{ added_in = 1, fields = [] }
132+
]

Diff for: package-manifest.toml

+6-6
Original file line numberDiff line numberDiff line change
@@ -717,8 +717,8 @@ only_for_targets.image = "standard"
717717
# the other `source.*` keys.
718718
source.type = "prebuilt"
719719
source.repo = "dendrite"
720-
source.commit = "b425a6c783b3081613ffa00407f271cae06568c4"
721-
source.sha256 = "5acb29662fa052fc7805716cf761c39bf0cef168c1e33d9fdb259104503203e5"
720+
source.commit = "a502a30b43b32ad4c884ab4e1e6d87bc01387166"
721+
source.sha256 = "a79cc46ce4a994ca245c8d52f157521c4c32399ae8b6e6dbb4b54126c7d6dc6b"
722722
output.type = "zone"
723723
output.intermediate_only = true
724724

@@ -744,8 +744,8 @@ only_for_targets.image = "standard"
744744
# the other `source.*` keys.
745745
source.type = "prebuilt"
746746
source.repo = "dendrite"
747-
source.commit = "b425a6c783b3081613ffa00407f271cae06568c4"
748-
source.sha256 = "2c1f901ac96028264fd904fb197d1aaef522432ad16097d3606321101813dfde"
747+
source.commit = "a502a30b43b32ad4c884ab4e1e6d87bc01387166"
748+
source.sha256 = "05ae723439b432f66e76427805bb2d0fa28603dc28c22624f2a6f872fa6b3627"
749749
output.type = "zone"
750750
output.intermediate_only = true
751751

@@ -764,8 +764,8 @@ only_for_targets.image = "standard"
764764
# the other `source.*` keys.
765765
source.type = "prebuilt"
766766
source.repo = "dendrite"
767-
source.commit = "b425a6c783b3081613ffa00407f271cae06568c4"
768-
source.sha256 = "94d1231d5b4ba5ea3cd7ce90ea0c46d43c87bfdd3e694cc99a78f8578ba1b9f7"
767+
source.commit = "a502a30b43b32ad4c884ab4e1e6d87bc01387166"
768+
source.sha256 = "982a28193410e1641e0f8ac015c294610dc1bfd162bae8e68b9ba8ebf3800e75"
769769
output.type = "zone"
770770
output.intermediate_only = true
771771

Diff for: sled-agent/src/metrics.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -223,15 +223,15 @@ async fn remove_datalink(
223223
Ok(_) => {
224224
debug!(
225225
log,
226-
"Removed VNIC from tracked links";
226+
"removed VNIC from tracked links";
227227
"link_name" => name,
228228
);
229229
}
230230
Err(err) => {
231231
error!(
232232
log,
233-
"Failed to remove VNIC from kstat sampler, \
234-
metrics may still be produced for it";
233+
"failed to remove VNIC from kstat sampler, \
234+
metrics may still be produced for it";
235235
"link_name" => name,
236236
"error" => ?err,
237237
);
@@ -263,7 +263,7 @@ async fn add_datalink(
263263
Ok(id) => {
264264
debug!(
265265
log,
266-
"Added new link to kstat sampler";
266+
"added new link to kstat sampler";
267267
"link_name" => entry.key(),
268268
"link_kind" => %link.kind(),
269269
"zone_name" => %link.zone_name(),
@@ -273,8 +273,8 @@ async fn add_datalink(
273273
Err(err) => {
274274
error!(
275275
log,
276-
"Failed to add VNIC to kstat sampler, \
277-
no metrics will be collected for it";
276+
"failed to add VNIC to kstat sampler, \
277+
no metrics will be collected for it";
278278
"link_name" => entry.key(),
279279
"link_kind" => %link.kind(),
280280
"zone_name" => %link.zone_name(),
@@ -310,14 +310,14 @@ async fn sync_sled_datalinks(
310310
Ok(_) => {
311311
debug!(
312312
log,
313-
"Updated link already tracked by kstat sampler";
313+
"updated link already tracked by kstat sampler";
314314
"link_name" => link_name,
315315
);
316316
}
317317
Err(err) => {
318318
error!(
319319
log,
320-
"Failed to update link already tracked by kstat sampler";
320+
"failed to update link already tracked by kstat sampler";
321321
"link_name" => link_name,
322322
"error" => ?err,
323323
);

Diff for: sled-agent/src/services.rs

+118-10
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ use omicron_common::address::LLDP_PORT;
7777
use omicron_common::address::MGS_PORT;
7878
use omicron_common::address::RACK_PREFIX;
7979
use omicron_common::address::SLED_PREFIX;
80+
use omicron_common::address::TFPORTD_PORT;
8081
use omicron_common::address::WICKETD_NEXUS_PROXY_PORT;
8182
use omicron_common::address::WICKETD_PORT;
8283
use omicron_common::address::{
@@ -2852,17 +2853,67 @@ impl ServiceManager {
28522853
SwitchService::Tfport { pkt_source, asic } => {
28532854
info!(self.inner.log, "Setting up tfport service");
28542855
let mut tfport_config =
2855-
PropertyGroupBuilder::new("config")
2856+
PropertyGroupBuilder::new("config");
2857+
2858+
tfport_config = tfport_config
2859+
.add_property(
2860+
"host",
2861+
"astring",
2862+
&format!("[{}]", Ipv6Addr::LOCALHOST),
2863+
)
2864+
.add_property(
2865+
"port",
2866+
"astring",
2867+
&format!("{}", DENDRITE_PORT),
2868+
);
2869+
if let Some(i) = info {
2870+
tfport_config = tfport_config
2871+
.add_property(
2872+
"rack_id",
2873+
"astring",
2874+
&i.rack_id.to_string(),
2875+
)
2876+
.add_property(
2877+
"sled_id",
2878+
"astring",
2879+
&i.config
2880+
.sled_identifiers
2881+
.sled_id
2882+
.to_string(),
2883+
)
2884+
.add_property(
2885+
"sled_model",
2886+
"astring",
2887+
&i.config
2888+
.sled_identifiers
2889+
.model
2890+
.to_string(),
2891+
)
28562892
.add_property(
2857-
"host",
2893+
"sled_revision",
28582894
"astring",
2859-
&format!("[{}]", Ipv6Addr::LOCALHOST),
2895+
&i.config
2896+
.sled_identifiers
2897+
.revision
2898+
.to_string(),
28602899
)
28612900
.add_property(
2862-
"port",
2901+
"sled_serial",
28632902
"astring",
2864-
&format!("{}", DENDRITE_PORT),
2903+
&i.config
2904+
.sled_identifiers
2905+
.serial
2906+
.to_string(),
28652907
);
2908+
}
2909+
2910+
for address in addresses {
2911+
tfport_config = tfport_config.add_property(
2912+
"address",
2913+
"astring",
2914+
&format!("[{}]:{}", address, TFPORTD_PORT),
2915+
);
2916+
}
28662917

28672918
let is_gimlet = is_gimlet().map_err(|e| {
28682919
Error::Underlay(
@@ -2903,6 +2954,7 @@ impl ServiceManager {
29032954

29042955
if is_gimlet
29052956
|| asic == &DendriteAsic::SoftNpuPropolisDevice
2957+
|| asic == &DendriteAsic::TofinoAsic
29062958
{
29072959
tfport_config = tfport_config.add_property(
29082960
"pkt_source",
@@ -4426,7 +4478,7 @@ impl ServiceManager {
44264478
} else {
44274479
info!(
44284480
self.inner.log,
4429-
"no rack_id/sled_id available yet"
4481+
"no sled info available yet"
44304482
);
44314483
}
44324484
smfh.delpropvalue_default_instance(
@@ -4500,10 +4552,66 @@ impl ServiceManager {
45004552
smfh.refresh()?;
45014553
info!(self.inner.log, "refreshed lldpd service with new configuration")
45024554
}
4503-
SwitchService::Tfport { .. } => {
4504-
// Since tfport and dpd communicate using localhost,
4505-
// the tfport service shouldn't need to be
4506-
// restarted.
4555+
SwitchService::Tfport { pkt_source, asic } => {
4556+
info!(self.inner.log, "configuring tfport service");
4557+
if let Some(info) = self.inner.sled_info.get() {
4558+
smfh.setprop_default_instance(
4559+
"config/rack_id",
4560+
info.rack_id,
4561+
)?;
4562+
smfh.setprop_default_instance(
4563+
"config/sled_id",
4564+
info.config.sled_identifiers.sled_id,
4565+
)?;
4566+
smfh.setprop_default_instance(
4567+
"config/sled_model",
4568+
info.config
4569+
.sled_identifiers
4570+
.model
4571+
.to_string(),
4572+
)?;
4573+
smfh.setprop_default_instance(
4574+
"config/sled_revision",
4575+
info.config.sled_identifiers.revision,
4576+
)?;
4577+
smfh.setprop_default_instance(
4578+
"config/sled_serial",
4579+
info.config
4580+
.sled_identifiers
4581+
.serial
4582+
.to_string(),
4583+
)?;
4584+
} else {
4585+
info!(
4586+
self.inner.log,
4587+
"no sled info available yet"
4588+
);
4589+
}
4590+
smfh.delpropvalue_default_instance(
4591+
"config/address",
4592+
"*",
4593+
)?;
4594+
for address in &request.addresses {
4595+
smfh.addpropvalue_type_default_instance(
4596+
"config/address",
4597+
&format!("[{}]:{}", address, TFPORTD_PORT),
4598+
"astring",
4599+
)?;
4600+
}
4601+
4602+
match asic {
4603+
DendriteAsic::SoftNpuPropolisDevice
4604+
| DendriteAsic::TofinoAsic => {
4605+
smfh.setprop_default_instance(
4606+
"config/pkt_source",
4607+
pkt_source,
4608+
)?;
4609+
}
4610+
_ => {}
4611+
}
4612+
4613+
smfh.refresh()?;
4614+
info!(self.inner.log, "refreshed tfport service with new configuration")
45074615
}
45084616
SwitchService::Pumpkind { .. } => {
45094617
// Unless we want to plumb through the "only log

0 commit comments

Comments
 (0)