Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import ai.vespa.metricsproxy.metric.ExternalMetrics;
import ai.vespa.metricsproxy.metric.dimensions.ApplicationDimensions;
import ai.vespa.metricsproxy.metric.dimensions.ApplicationDimensionsConfig;
import ai.vespa.metricsproxy.metric.dimensions.MetricDimensionMapping;
import ai.vespa.metricsproxy.metric.dimensions.MetricDimensionMappingConfig;
import ai.vespa.metricsproxy.metric.dimensions.PublicDimensions;
import ai.vespa.metricsproxy.rpc.RpcServer;
import ai.vespa.metricsproxy.service.ConfigSentinelClient;
Expand Down Expand Up @@ -65,6 +67,7 @@
*/
public class MetricsProxyContainerCluster extends ContainerCluster<MetricsProxyContainer> implements
ApplicationDimensionsConfig.Producer,
MetricDimensionMappingConfig.Producer,
ConsumersConfig.Producer,
MonitoringConfig.Producer,
MetricsNodesConfig.Producer
Expand All @@ -73,6 +76,9 @@ public class MetricsProxyContainerCluster extends ContainerCluster<MetricsProxyC

public static final String NEW_DEFAULT_CONSUMER_ID = "new-default";

// The service/application name of the locally generated 'alive' packet; see HostLifeGatherer.
private static final String HOST_LIFE_SERVICE = "host_life";

private static final String METRICS_PROXY_NAME = "metrics-proxy";

static final Path METRICS_PROXY_BUNDLE_FILE = PlatformBundles.absoluteBundlePath(METRICS_PROXY_NAME);
Expand Down Expand Up @@ -120,6 +126,7 @@ private void addClusterComponents() {
addMetricsProxyComponent(ApplicationDimensions.class);
addMetricsProxyComponent(ConfigSentinelClient.class);
addMetricsProxyComponent(ExternalMetrics.class);
addMetricsProxyComponent(MetricDimensionMapping.class);
addMetricsProxyComponent(MetricsConsumers.class);
addMetricsProxyComponent(MetricsManager.class);
addMetricsProxyComponent(RpcServer.class);
Expand Down Expand Up @@ -191,6 +198,18 @@ public void getConfig(ApplicationDimensionsConfig.Builder builder) {
}
}

@Override
public void getConfig(MetricDimensionMappingConfig.Builder builder) {
if (isHostedVespa()) {
builder.defaultDimension(PublicDimensions.HOSTNAME);
builder.defaultDimension(PublicDimensions.PARENT_HOSTNAME);
builder.service(HOST_LIFE_SERVICE, s -> s
.dimension(PublicDimensions.HOSTNAME)
.dimension(PublicDimensions.PARENT_HOSTNAME)
.dimension(PublicDimensions.OS_VERSION));

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want to add the OS version dimension to the host_life metric for everyone?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we want it to be a generic feature for everyone. Also it will be useful for us on our dashboards.

}
}

protected boolean messageBusEnabled() { return false; }

private MetricSet getAdditionalDefaultMetrics() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import ai.vespa.metricsproxy.http.prometheus.PrometheusHandler;
import ai.vespa.metricsproxy.http.yamas.YamasHandler;
import ai.vespa.metricsproxy.metric.dimensions.ApplicationDimensionsConfig;
import ai.vespa.metricsproxy.metric.dimensions.MetricDimensionMappingConfig;
import ai.vespa.metricsproxy.metric.dimensions.PublicDimensions;
import com.yahoo.component.ComponentSpecification;
import com.yahoo.config.model.test.MockApplicationPackage;
Expand All @@ -25,6 +26,7 @@

import java.nio.file.Path;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.stream.Stream;

Expand All @@ -37,6 +39,7 @@
import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.TestMode.hosted;
import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.TestMode.self_hosted;
import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.getApplicationDimensionsConfig;
import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.getMetricDimensionMappingConfig;
import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.getMetricsNodesConfig;
import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.getModel;
import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.servicesWithAdminOnly;
Expand Down Expand Up @@ -108,6 +111,26 @@ void hosted_application_propagates_application_dimensions() {
assertEquals(MY_APPLICATION + "." + MY_INSTANCE, config.dimensions(AppDimensionNames.LEGACY_APPLICATION));
}

@Test
void hosted_application_propagates_metric_dimension_mapping() {
VespaModel hostedModel = getModel(servicesWithAdminOnly(), hosted);
MetricDimensionMappingConfig config = getMetricDimensionMappingConfig(hostedModel);

assertEquals(List.of(PublicDimensions.HOSTNAME, PublicDimensions.PARENT_HOSTNAME), config.defaultDimension());

assertEquals(Set.of("host_life"), config.service().keySet());
assertEquals(List.of(PublicDimensions.HOSTNAME, PublicDimensions.PARENT_HOSTNAME, PublicDimensions.OS_VERSION),
config.service("host_life").dimension());
}

@Test
void self_hosted_application_has_empty_metric_dimension_mapping() {
VespaModel model = getModel(servicesWithAdminOnly(), self_hosted);
MetricDimensionMappingConfig config = getMetricDimensionMappingConfig(model);
assertTrue(config.defaultDimension().isEmpty());
assertTrue(config.service().isEmpty());
}

@Test
void all_nodes_are_included_in_metrics_nodes_config() {
VespaModel hostedModel = getModel(servicesWithTwoNodes(), hosted);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import ai.vespa.metricsproxy.core.ConsumersConfig;
import ai.vespa.metricsproxy.http.application.MetricsNodesConfig;
import ai.vespa.metricsproxy.metric.dimensions.ApplicationDimensionsConfig;
import ai.vespa.metricsproxy.metric.dimensions.MetricDimensionMappingConfig;
import ai.vespa.metricsproxy.metric.dimensions.NodeDimensionsConfig;
import ai.vespa.metricsproxy.rpc.RpcConnectorConfig;
import ai.vespa.metricsproxy.service.VespaServicesConfig;
Expand Down Expand Up @@ -108,6 +109,10 @@ static ApplicationDimensionsConfig getApplicationDimensionsConfig(VespaModel mod
return model.getConfig(ApplicationDimensionsConfig.class, CLUSTER_CONFIG_ID);
}

static MetricDimensionMappingConfig getMetricDimensionMappingConfig(VespaModel model) {
return model.getConfig(MetricDimensionMappingConfig.class, CLUSTER_CONFIG_ID);
}

static NodeDimensionsConfig getNodeDimensionsConfig(VespaModel model, String configId) {
return model.getConfig(NodeDimensionsConfig.class, configId);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import ai.vespa.metricsproxy.metric.model.ConsumerId;
import ai.vespa.metricsproxy.metric.model.DimensionId;
import ai.vespa.metricsproxy.metric.model.MetricsPacket;
import ai.vespa.metricsproxy.metric.model.ServiceId;
import ai.vespa.metricsproxy.service.VespaService;
import ai.vespa.metricsproxy.service.VespaServices;

Expand All @@ -17,6 +18,7 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
import java.util.stream.Collectors;

Expand All @@ -41,6 +43,7 @@ public class MetricsManager {
private final NodeDimensions nodeDimensions;

private volatile Map<DimensionId, String> extraDimensions = new HashMap<>();
private volatile Map<DimensionId, String> extraHostDimensions = new HashMap<>();
private volatile Instant externalMetricsUpdateTime = Instant.now();
private static final Duration EXTERNAL_METRICS_TTL = Duration.ofMinutes(10);

Expand Down Expand Up @@ -173,6 +176,7 @@ public List<MetricsPacket> getHealthMetrics(List<VespaService> services) {
public void setExtraMetrics(List<MetricsPacket.Builder> packets) {
externalMetricsUpdateTime = Instant.now();
extraDimensions = extractConfigserverDimensions(packets);
extraHostDimensions = externalMetrics.extractHostDimensions(packets);
externalMetrics.setExtraMetrics(packets);
}

Expand All @@ -181,6 +185,17 @@ public Map<DimensionId, String> getExtraDimensions() {
return this.extraDimensions;
}

/** Host-level dimensions (host, parentHostname, osVersion) for the given service, filtered by the metric-to-dimension mapping. */
public Map<DimensionId, String> getExtraHostDimensions(ServiceId serviceId) {
purgeStaleMetrics();
Set<DimensionId> allowed = externalMetrics.allowedHostDimensions(serviceId);
Map<DimensionId, String> result = new LinkedHashMap<>();
extraHostDimensions.forEach((id, value) -> {
if (allowed.contains(id)) result.put(id, value);
});
return result;
}

private void purgeStaleMetrics() {
if (Duration.between(externalMetricsUpdateTime, Instant.now()).getSeconds() > EXTERNAL_METRICS_TTL.getSeconds()) {
purgeExtraMetrics();
Expand All @@ -189,6 +204,7 @@ private void purgeStaleMetrics() {

public void purgeExtraMetrics() {
extraDimensions = new HashMap<>();
extraHostDimensions = new HashMap<>();
externalMetrics.setExtraMetrics(List.of());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import ai.vespa.metricsproxy.core.ConfiguredMetric;
import ai.vespa.metricsproxy.core.MetricsConsumers;
import ai.vespa.metricsproxy.metric.dimensions.MetricDimensionMapping;
import ai.vespa.metricsproxy.metric.model.DimensionId;
import ai.vespa.metricsproxy.metric.model.MetricId;
import ai.vespa.metricsproxy.metric.model.MetricsPacket;
Expand Down Expand Up @@ -39,9 +40,16 @@ public class ExternalMetrics {

private volatile List<MetricsPacket.Builder> metrics = new ArrayList<>();
private final MetricsConsumers consumers;
private final MetricDimensionMapping dimensionMapping;

public ExternalMetrics(MetricsConsumers consumers) {
public ExternalMetrics(MetricsConsumers consumers, MetricDimensionMapping dimensionMapping) {
this.consumers = consumers;
this.dimensionMapping = dimensionMapping;
}

/** The host dimensions allowed on metrics from the given service (explicit mapping, else default). */
public Set<DimensionId> allowedHostDimensions(ServiceId serviceId) {
return dimensionMapping.allowedFor(serviceId);
}

public List<MetricsPacket.Builder> getMetrics() {
Expand All @@ -56,6 +64,7 @@ public void setExtraMetrics(List<MetricsPacket.Builder> externalPackets) {
externalPackets.forEach(packet -> packet.addConsumers(consumers.getAllConsumers())
.retainMetrics(metricsToRetain())
.applyOutputNames(outputNamesById()));
externalPackets.forEach(this::stripDisallowedHostDimensions);
metrics = List.copyOf(externalPackets);
}

Expand Down Expand Up @@ -92,4 +101,33 @@ public static Map<DimensionId, String> extractConfigserverDimensions(Collection<
return dimensions;
}

/**
* Extracts the host-level dimensions (host, parentHostname, osVersion) from the given packets.
* These are harvested separately from {@link #extractConfigserverDimensions} (role, state) so they
* can be applied to specific metrics via the metric-to-dimension mapping, rather than globally.
* If the same dimension exists in multiple packets, this implementation gives no guarantees
* about which value is returned.
*/
public Map<DimensionId, String> extractHostDimensions(Collection<MetricsPacket.Builder> packets) {
Map<DimensionId, String> dimensions = new HashMap<>();
for (MetricsPacket.Builder packet : packets) {
dimensions.putAll(packet.build().dimensions());
}
dimensions.keySet().retainAll(dimensionMapping.managedDimensions());
return dimensions;
}

/**
* Removes host dimensions not allowed for the packet's service, e.g. strips 'osVersion' from
* carrier packets (vespa.node etc.) so it only remains where the mapping allows it (host_life).
* Non-host dimensions (role, state, ...) are left untouched.
*/
private void stripDisallowedHostDimensions(MetricsPacket.Builder packet) {
Set<DimensionId> allowed = allowedHostDimensions(packet.getServiceId());
Set<DimensionId> managed = dimensionMapping.managedDimensions();
Set<DimensionId> retained = packet.getDimensionIds();
retained.removeIf(id -> managed.contains(id) && ! allowed.contains(id));

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This mutates retained, which is just the return value of packet.getDimensionIds(). This method shouldn't assume that return value is mutable.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair to flag, but I think this is within the method's documented contract rather than an assumption. getDimensionIds()'s javadoc says it

"Returns a modifiable copy of the dimension IDs of this builder, usually for use with retainDimensions(Collection)"

It hands back a fresh LinkedHashSet (not a view over the builder's internal map), intended for exactly this get → mutate → retainDimensions() pattern. So retained.removeIf(...) only mutates that copy, never the builder.

Existing block in PublicDimensionsProcessor does the same thing.

That said, if you'd rather this code not depend on that contract, I can to wrap it in a local new HashSet<>(...)..

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No that's fine. Reviewing such code is harder since the code isn't obviously correct. So usually code does the wrapping client-side. It would also have been fine if the method name had been clearer (e.g. getMutableDimensionIds())

packet.retainDimensions(retained);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package ai.vespa.metricsproxy.metric.dimensions;

import ai.vespa.metricsproxy.metric.model.DimensionId;
import ai.vespa.metricsproxy.metric.model.ServiceId;

import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import static ai.vespa.metricsproxy.metric.model.DimensionId.toDimensionId;
import static ai.vespa.metricsproxy.metric.model.ServiceId.toServiceId;
import static java.util.stream.Collectors.toUnmodifiableMap;
import static java.util.stream.Collectors.toUnmodifiableSet;

/**
* Maps a metric packet's service to the host dimensions allowed on it. Services not explicitly
* listed fall back to the default set. Used to harvest host dimensions from external (host-admin)
* packets and to keep each such dimension only on the metrics the mapping allows.
*
* @author onur
*/
public class MetricDimensionMapping {

private final Set<DimensionId> defaultDimensions;
private final Map<ServiceId, Set<DimensionId>> dimensionsByService;
private final Set<DimensionId> managedDimensions;

public MetricDimensionMapping(MetricDimensionMappingConfig config) {
defaultDimensions = config.defaultDimension().stream()
.map(DimensionId::toDimensionId)
.collect(toUnmodifiableSet());
dimensionsByService = config.service().entrySet().stream().collect(toUnmodifiableMap(
entry -> toServiceId(entry.getKey()),
entry -> entry.getValue().dimension().stream().map(DimensionId::toDimensionId).collect(toUnmodifiableSet())));
Set<DimensionId> all = new HashSet<>(defaultDimensions);
dimensionsByService.values().forEach(all::addAll);
managedDimensions = Set.copyOf(all);
}

/** Host dimensions allowed on metrics from the given service (explicit mapping, else default). */
public Set<DimensionId> allowedFor(ServiceId serviceId) {
return dimensionsByService.getOrDefault(serviceId, defaultDimensions);
}

/** The union of all dimensions the mapping manages; only these are harvested and stripped. */
public Set<DimensionId> managedDimensions() {
return managedDimensions;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ private PublicDimensions() { }
// From host-admin, currently (Jan 2020) only included for 'vespa.node' metrics
public static final String HOSTNAME = "host";

// From host-admin. The physical host the node runs on.
public static final String PARENT_HOSTNAME = "parentHostname";

// From host-admin. The OS version of the physical host.
public static final String OS_VERSION = "osVersion";


/** Metric specific dimensions **/
public static final String API = "api"; // feed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ public Builder service(ServiceId service) {
return this;
}

public ServiceId getServiceId() { return service; }

public Builder statusCode(Integer statusCode) {
if (statusCode != null) this.statusCode = statusCode;
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ public List<MetricsPacket> gatherMetrics() {
.map(metricPacketBuilder ->
metricPacketBuilder.putDimensionsIfAbsent(applicationDimensions.getDimensions())
.putDimensionsIfAbsent(nodeDimensions.getDimensions())
.putDimensionsIfAbsent(metricsManager.getExtraDimensions()).build()
.putDimensionsIfAbsent(metricsManager.getExtraDimensions())
.putDimensionsIfAbsent(metricsManager.getExtraHostDimensions(metricPacketBuilder.getServiceId())).build()
).toList();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package=ai.vespa.metricsproxy.metric.dimensions

# Host dimensions allowed on metrics whose service is not explicitly listed below.
defaultDimension[] string

# Per-service overrides, keyed by service name: the host dimensions allowed on metrics from that service.
service{}.dimension[] string
19 changes: 18 additions & 1 deletion metrics-proxy/src/test/java/ai/vespa/metricsproxy/TestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import ai.vespa.metricsproxy.core.VespaMetrics;
import ai.vespa.metricsproxy.metric.ExternalMetrics;
import ai.vespa.metricsproxy.metric.dimensions.ApplicationDimensions;
import ai.vespa.metricsproxy.metric.dimensions.MetricDimensionMapping;
import ai.vespa.metricsproxy.metric.dimensions.MetricDimensionMappingConfig;
import ai.vespa.metricsproxy.metric.dimensions.NodeDimensions;
import ai.vespa.metricsproxy.service.VespaServices;

Expand All @@ -25,10 +27,25 @@ public static MetricsManager createMetricsManager(VespaServices vespaServices,
ApplicationDimensions applicationDimensions,
NodeDimensions nodeDimensions) {
VespaMetrics metrics = new VespaMetrics(consumers);
return new MetricsManager(vespaServices, metrics, new ExternalMetrics(consumers),
return new MetricsManager(vespaServices, metrics, new ExternalMetrics(consumers, standardDimensionMapping()),
applicationDimensions, nodeDimensions);
}

/**
* The metric-to-dimension mapping that config-model generates by default: host_life gets osVersion
* in addition to host/parentHostname; services not listed keep only host/parentHostname.
*/
public static MetricDimensionMapping standardDimensionMapping() {
return new MetricDimensionMapping(new MetricDimensionMappingConfig.Builder()
.defaultDimension("host")
.defaultDimension("parentHostname")
.service("host_life", s -> s
.dimension("host")
.dimension("parentHostname")
.dimension("osVersion"))
.build());
}

public static String getFileContents(String filename) {
InputStream in = TestUtil.class.getClassLoader().getResourceAsStream(filename);
if (in == null) {
Expand Down
Loading