diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/awsCloudWatch.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/awsCloudWatch.yaml index 871f84bb04..c9e80551c6 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/awsCloudWatch.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/awsCloudWatch.yaml @@ -1,8 +1,30 @@ exporters: awsemf: namespace: 'OpenSearchMigrations' + dimension_rollup_option: NoDimensionRollup # Reduce number of metrics by only publishing with all dimensions + resource_to_telemetry_conversion: + enabled: true + +processors: + cumulativetodelta: + resource/metrics: + attributes: + - key: qualifier + value: ${env:QUALIFIER} + action: upsert + resource/remove_default_attributes: + attributes: + - key: telemetry.sdk.name + action: delete + - key: telemetry.sdk.version + action: delete + - key: telemetry.sdk.language + action: delete + - key: service.name + action: delete service: pipelines: metrics: - exporters: [ awsemf ] + processors: [resource/remove_default_attributes, resource/metrics, cumulativetodelta] + exporters: [ awsemf ] \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/base.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/base.yaml index 20b605dfe1..0218c6f483 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/base.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/base.yaml @@ -2,6 +2,9 @@ receivers: otlp: protocols: grpc: + endpoint: :4317 + http: + endpoint: :4318 processors: diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/batchTraces.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/batchTraces.yaml index a32aff59d0..0e6c94ecfa 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/batchTraces.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/batchTraces.yaml @@ -1,4 +1,8 @@ +processors: + probabilistic_sampler/traces: + sampling_percentage: ${env:TRACE_SAMPLING_PERCENTAGE:-1} + service: pipelines: traces: - processors: [ batch ] + processors: [probabilistic_sampler/traces, batch] diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugDetailed.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugDetailed.yaml index 1568fb2764..2754f7d303 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugDetailed.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugDetailed.yaml @@ -1,5 +1,5 @@ exporters: - logging: + debug: verbosity: detailed sampling_initial: 5 sampling_thereafter: 200 \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugInfo.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugInfo.yaml index 760aabf66a..904f2484c9 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugInfo.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugInfo.yaml @@ -1,5 +1,5 @@ exporters: - logging: + debug: verbosity: info sampling_initial: 5 sampling_thereafter: 200 \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugLogsDetailed.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugLogsDetailed.yaml index 04c30b4a9b..f581a5cf8d 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugLogsDetailed.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugLogsDetailed.yaml @@ -1,4 +1,4 @@ service: pipelines: logs: - exporters: [logging] \ No newline at end of file + exporters: [debug] \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugLogsInfo.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugLogsInfo.yaml index 04c30b4a9b..f581a5cf8d 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugLogsInfo.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugLogsInfo.yaml @@ -1,4 +1,4 @@ service: pipelines: logs: - exporters: [logging] \ No newline at end of file + exporters: [debug] \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugMetricsDetailed.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugMetricsDetailed.yaml index d7b4f497cf..0816f7ef2a 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugMetricsDetailed.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugMetricsDetailed.yaml @@ -1,4 +1,4 @@ service: pipelines: metrics: - exporters: [logging] \ No newline at end of file + exporters: [debug] \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugMetricsInfo.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugMetricsInfo.yaml index d7b4f497cf..0816f7ef2a 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugMetricsInfo.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugMetricsInfo.yaml @@ -1,4 +1,4 @@ service: pipelines: metrics: - exporters: [logging] \ No newline at end of file + exporters: [debug] \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugTracesDetailed.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugTracesDetailed.yaml index c1e7f95414..02d9844401 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugTracesDetailed.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugTracesDetailed.yaml @@ -1,4 +1,4 @@ service: pipelines: traces: - exporters: [logging] \ No newline at end of file + exporters: [debug] \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugTracesInfo.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugTracesInfo.yaml index c1e7f95414..02d9844401 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugTracesInfo.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/debugTracesInfo.yaml @@ -1,4 +1,4 @@ service: pipelines: traces: - exporters: [logging] \ No newline at end of file + exporters: [debug] \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/healthCheck.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/healthCheck.yaml index 5e20967f14..983a57e9fc 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/healthCheck.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/healthCheck.yaml @@ -1,5 +1,6 @@ extensions: health_check: + endpoint: :13133 service: extensions: [ health_check ] \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/prometheus.yaml b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/prometheus.yaml index a2d885d64c..3f23919bc1 100644 --- a/TrafficCapture/dockerSolution/otelConfigs/configSnippets/prometheus.yaml +++ b/TrafficCapture/dockerSolution/otelConfigs/configSnippets/prometheus.yaml @@ -6,6 +6,14 @@ exporters: enable_open_metrics: true service: + telemetry: + metrics: + readers: + - pull: + exporter: + prometheus: + host: '0.0.0.0' + port: 8888 pipelines: metrics: exporters: [ prometheus ] \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/src/main/docker/composeExtensions/configs/otel-config-everything.yaml b/TrafficCapture/dockerSolution/src/main/docker/composeExtensions/configs/otel-config-everything.yaml index 0f912d1630..7220f6546e 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/composeExtensions/configs/otel-config-everything.yaml +++ b/TrafficCapture/dockerSolution/src/main/docker/composeExtensions/configs/otel-config-everything.yaml @@ -2,24 +2,49 @@ receivers: otlp: protocols: grpc: + endpoint: :4317 + http: + endpoint: :4318 processors: batch: timeout: 10s send_batch_size: 8192 send_batch_max_size: 10000 + probabilistic_sampler/traces: + sampling_percentage: ${env:TRACE_SAMPLING_PERCENTAGE:-1} + cumulativetodelta: + resource/metrics: + attributes: + - key: qualifier + value: ${env:QUALIFIER} + action: upsert + resource/remove_default_attributes: + attributes: + - key: telemetry.sdk.name + action: delete + - key: telemetry.sdk.version + action: delete + - key: telemetry.sdk.language + action: delete + - key: service.name + action: delete extensions: zpages: endpoint: :55679 pprof: endpoint: :1888 health_check: + endpoint: :13133 exporters: - logging: + debug: verbosity: detailed sampling_initial: 5 sampling_thereafter: 200 awsemf: namespace: 'OpenSearchMigrations' + dimension_rollup_option: NoDimensionRollup # Reduce number of metrics by only publishing with all dimensions + resource_to_telemetry_conversion: + enabled: true awsxray: index_all_attributes: true prometheus: @@ -36,13 +61,21 @@ service: pipelines: metrics: receivers: [otlp] - processors: [batch] - exporters: [logging, awsemf, prometheus] + processors: [batch, resource/remove_default_attributes, resource/metrics, cumulativetodelta] + exporters: [debug, awsemf, prometheus] traces: receivers: [otlp] - processors: [batch] - exporters: [logging, awsxray, otlp/jaeger] + processors: [probabilistic_sampler/traces, batch] + exporters: [debug, awsxray, otlp/jaeger] logs: receivers: [otlp] processors: - exporters: [logging] + exporters: [debug] + telemetry: + metrics: + readers: + - pull: + exporter: + prometheus: + host: '0.0.0.0' + port: 8888 diff --git a/TrafficCapture/dockerSolution/src/main/docker/composeExtensions/configs/otel-config-prometheus-jaeger.yaml b/TrafficCapture/dockerSolution/src/main/docker/composeExtensions/configs/otel-config-prometheus-jaeger.yaml index f3b804777a..5165ef38ae 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/composeExtensions/configs/otel-config-prometheus-jaeger.yaml +++ b/TrafficCapture/dockerSolution/src/main/docker/composeExtensions/configs/otel-config-prometheus-jaeger.yaml @@ -2,19 +2,25 @@ receivers: otlp: protocols: grpc: + endpoint: :4317 + http: + endpoint: :4318 processors: batch: timeout: 10s send_batch_size: 8192 send_batch_max_size: 10000 + probabilistic_sampler/traces: + sampling_percentage: ${env:TRACE_SAMPLING_PERCENTAGE:-1} extensions: zpages: endpoint: :55679 pprof: endpoint: :1888 health_check: + endpoint: :13133 exporters: - logging: + debug: verbosity: detailed sampling_initial: 5 sampling_thereafter: 200 @@ -33,12 +39,20 @@ service: metrics: receivers: [otlp] processors: [batch] - exporters: [logging, prometheus] + exporters: [debug, prometheus] traces: receivers: [otlp] - processors: [batch] - exporters: [logging, otlp/jaeger] + processors: [probabilistic_sampler/traces, batch] + exporters: [debug, otlp/jaeger] logs: receivers: [otlp] processors: - exporters: [logging] + exporters: [debug] + telemetry: + metrics: + readers: + - pull: + exporter: + prometheus: + host: '0.0.0.0' + port: 8888 diff --git a/TrafficCapture/dockerSolution/src/main/docker/grafana/Dockerfile b/TrafficCapture/dockerSolution/src/main/docker/grafana/Dockerfile index 1d737abfb0..1944f5a7e8 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/grafana/Dockerfile +++ b/TrafficCapture/dockerSolution/src/main/docker/grafana/Dockerfile @@ -1,3 +1,3 @@ -FROM grafana/grafana:11.1.0 +FROM grafana/grafana:11.6.1 COPY datasources.yaml /usr/share/grafana/conf/provisioning/datasources/ diff --git a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/data/cloudwatch_list_metrics_response.json b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/data/cloudwatch_list_metrics_response.json index 4dea112f17..448ca40c45 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/data/cloudwatch_list_metrics_response.json +++ b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/data/cloudwatch_list_metrics_response.json @@ -7,6 +7,10 @@ { "Name": "OTelLib", "Value": "captureProxy" + }, + { + "Name": "qualifier", + "Value": "dev" } ] }, @@ -17,6 +21,10 @@ { "Name": "OTelLib", "Value": "captureProxy" + }, + { + "Name": "qualifier", + "Value": "dev" } ] }, @@ -27,6 +35,10 @@ { "Name": "OTelLib", "Value": "replayer" + }, + { + "Name": "qualifier", + "Value": "dev" } ] } diff --git a/TrafficCapture/dockerSolution/src/main/docker/otelCollector/Dockerfile b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/Dockerfile index 4f87619454..04cf6690b9 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/otelCollector/Dockerfile +++ b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/aws-observability/aws-otel-collector:v0.38.0 +FROM public.ecr.aws/aws-observability/aws-otel-collector:v0.43.3 COPY otel-config*.yaml /etc/ CMD ["--config", "/etc/otel-config-debug-only.yaml"] diff --git a/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-aws-debug.yaml b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-aws-debug.yaml index 67a5fd468d..37318366a9 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-aws-debug.yaml +++ b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-aws-debug.yaml @@ -2,20 +2,45 @@ receivers: otlp: protocols: grpc: + endpoint: :4317 + http: + endpoint: :4318 processors: batch: timeout: 10s send_batch_size: 8192 send_batch_max_size: 10000 + probabilistic_sampler/traces: + sampling_percentage: ${env:TRACE_SAMPLING_PERCENTAGE:-1} + cumulativetodelta: + resource/metrics: + attributes: + - key: qualifier + value: ${env:QUALIFIER} + action: upsert + resource/remove_default_attributes: + attributes: + - key: telemetry.sdk.name + action: delete + - key: telemetry.sdk.version + action: delete + - key: telemetry.sdk.language + action: delete + - key: service.name + action: delete extensions: health_check: + endpoint: :13133 exporters: - logging: + debug: verbosity: detailed sampling_initial: 5 sampling_thereafter: 200 awsemf: namespace: 'OpenSearchMigrations' + dimension_rollup_option: NoDimensionRollup # Reduce number of metrics by only publishing with all dimensions + resource_to_telemetry_conversion: + enabled: true awsxray: index_all_attributes: true service: @@ -23,13 +48,13 @@ service: pipelines: metrics: receivers: [otlp] - processors: [batch] - exporters: [logging, awsemf] + processors: [batch, resource/remove_default_attributes, resource/metrics, cumulativetodelta] + exporters: [debug, awsemf] traces: receivers: [otlp] - processors: [batch] - exporters: [logging, awsxray] + processors: [probabilistic_sampler/traces, batch] + exporters: [debug, awsxray] logs: receivers: [otlp] processors: - exporters: [logging] + exporters: [debug] diff --git a/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-aws.yaml b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-aws.yaml index a8f440ec6f..8b57c6ac58 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-aws.yaml +++ b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-aws.yaml @@ -2,16 +2,41 @@ receivers: otlp: protocols: grpc: + endpoint: :4317 + http: + endpoint: :4318 processors: batch: timeout: 10s send_batch_size: 8192 send_batch_max_size: 10000 + probabilistic_sampler/traces: + sampling_percentage: ${env:TRACE_SAMPLING_PERCENTAGE:-1} + cumulativetodelta: + resource/metrics: + attributes: + - key: qualifier + value: ${env:QUALIFIER} + action: upsert + resource/remove_default_attributes: + attributes: + - key: telemetry.sdk.name + action: delete + - key: telemetry.sdk.version + action: delete + - key: telemetry.sdk.language + action: delete + - key: service.name + action: delete extensions: health_check: + endpoint: :13133 exporters: awsemf: namespace: 'OpenSearchMigrations' + dimension_rollup_option: NoDimensionRollup # Reduce number of metrics by only publishing with all dimensions + resource_to_telemetry_conversion: + enabled: true awsxray: index_all_attributes: true service: @@ -19,9 +44,9 @@ service: pipelines: metrics: receivers: [otlp] - processors: [batch] + processors: [batch, resource/remove_default_attributes, resource/metrics, cumulativetodelta] exporters: [awsemf] traces: receivers: [otlp] - processors: [batch] + processors: [probabilistic_sampler/traces, batch] exporters: [awsxray] diff --git a/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-debug-only.yaml b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-debug-only.yaml index baea39abc2..8b02c1664b 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-debug-only.yaml +++ b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/otel-config-debug-only.yaml @@ -2,6 +2,9 @@ receivers: otlp: protocols: grpc: + endpoint: :4317 + http: + endpoint: :4318 processors: extensions: zpages: @@ -9,8 +12,9 @@ extensions: pprof: endpoint: :1888 health_check: + endpoint: :13133 exporters: - logging: + debug: verbosity: detailed sampling_initial: 5 sampling_thereafter: 200 @@ -20,12 +24,12 @@ service: traces: receivers: [otlp] processors: - exporters: [logging] + exporters: [debug] metrics: receivers: [otlp] processors: - exporters: [logging] + exporters: [debug] logs: receivers: [otlp] processors: - exporters: [logging] + exporters: [debug] diff --git a/deployment/cdk/opensearch-service-migration/lib/components/capture-replay-dashboard.json b/deployment/cdk/opensearch-service-migration/lib/components/capture-replay-dashboard.json index 96db5083b4..ad948d1c81 100644 --- a/deployment/cdk/opensearch-service-migration/lib/components/capture-replay-dashboard.json +++ b/deployment/cdk/opensearch-service-migration/lib/components/capture-replay-dashboard.json @@ -17,6 +17,15 @@ "label": "Migration Stage", "defaultValue": "placeholder-stage", "visible": false + }, + { + "type": "pattern", + "pattern": "MA_QUALIFIER", + "inputType": "input", + "id": "MA_QUALIFIER", + "label": "Migration Qualifier", + "defaultValue": "placeholder-qualifier", + "visible": false } ], "widgets": [ @@ -39,8 +48,8 @@ "type": "metric", "properties": { "metrics": [ - [ "OpenSearchMigrations", "bytesRead", "OTelLib", "captureProxy", { "label": "Response Bytes", "region": "REGION", "id": "c1", "visible": false } ], - [ ".", "bytesWritten", ".", ".", { "label": "Request Bytes", "region": "REGION", "id": "c2", "visible": false } ], + [ "OpenSearchMigrations", "bytesRead", "qualifier", "MA_QUALIFIER", "OTelLib", "captureProxy", { "label": "Response Bytes", "region": "REGION", "id": "c1", "visible": false } ], + [ "OpenSearchMigrations", "bytesWritten", "qualifier", "MA_QUALIFIER", { "label": "Request Bytes", "region": "REGION", "id": "c2", "visible": false } ], [ { "expression": "(c1 + c2) / 1048576 / PERIOD(c1)", "label": "Proxy", "id": "e1", "region": "REGION", "color": "#7f7f7f" } ], [ { "expression": "SEARCH('{AWS/Kafka,\"Broker ID\",\"Cluster Name\",Topic} Topic=\"logging-traffic-topic\" MetricName=\"BytesInPerSec\" \"Cluster Name\"=\"migration-msk-cluster-MA_STAGE\"', 'Average', 60)", "id": "k1", "region": "REGION", "visible": false, "label": "Kafka Bytes In Per Second", "period": 60 } ], [ { "expression": "SUM(k1) / 1048576", "label": "Kafka", "id": "k2", "region": "REGION" } ], @@ -82,7 +91,7 @@ "properties": { "metrics": [ [ { "expression": "IF(m2 > 0, 1, 0)", "label": "CommitsObserved", "id": "e1", "region": "REGION", "period": 300 } ], - [ "OpenSearchMigrations", "kafkaCommitCount", "OTelLib", "replayer", { "id": "m2", "region": "REGION", "visible": false } ] + [ "OpenSearchMigrations", "kafkaCommitCount", "qualifier", "MA_QUALIFIER", "OTelLib", "replayer", { "id": "m2", "region": "REGION", "visible": false } ] ], "sparkline": true, "view": "timeSeries", @@ -165,10 +174,8 @@ "type": "metric", "properties": { "metrics": [ - [ "OpenSearchMigrations", "lagBetweenSourceAndTargetRequests", "OTelLib", "replayer", { "id": "m1", "region": "REGION", "stat": "Minimum", "visible": false, "label": "Min", "color": "#9edae5" } ], - [ "...", { "id": "m4", "region": "REGION", "visible": false, "label": "Avg", "color": "#dbdb8d" } ], - [ "...", { "id": "m5", "region": "REGION", "label": "Max", "stat": "Maximum", "visible": false, "color": "#c7c7c7" } ], - [ { "expression": "FLOOR(METRICS()/1000)/60", "label": "Replay Lag", "id": "e2", "region": "REGION", "color": "#9edae5", "period": 300 } ] + [ "OpenSearchMigrations", "lagBetweenSourceAndTargetRequests", "qualifier", "MA_QUALIFIER", "OTelLib", "replayer", { "id": "m4", "region": "REGION", "label": "Avg", "color": "#dbdb8d", "visible": false } ], + [ { "expression": "FLOOR(METRICS()/1000)/60", "label": "Replay Lag", "id": "e2", "region": "REGION", "color": "#9edae5", "period": 60 } ] ], "view": "timeSeries", "title": "Replayer Performance Analysis", @@ -180,7 +187,7 @@ } }, "stat": "Average", - "period": 300, + "period": 60, "liveData": true } }, @@ -192,7 +199,7 @@ "type": "metric", "properties": { "metrics": [ - [ { "expression": "SEARCH('{OpenSearchMigrations,OTelLib,method,sourceStatusCode,statusCodesMatch,targetStatusCode} statusCodesMatch=\"true\"', 'Sum', 60)", "id": "e2", "period": 60, "region": "REGION", "label": "" } ] + [ { "expression": "SEARCH('{OpenSearchMigrations,OTelLib, method, qualifier, sourceStatusCode, statusCodesMatch, targetStatusCode} qualifier=\"MA_QUALIFIER\" statusCodesMatch=\"true\"', 'Sum', 60)", "id": "e2", "period": 60, "region": "REGION", "label": "${PROP('Dim.method')} Status code class: ${PROP('Dim.sourceStatusCode')}" } ] ], "sparkline": true, "view": "timeSeries", @@ -220,15 +227,9 @@ "type": "metric", "properties": { "metrics": [ - [ { "expression": "FLOOR(m1_avg/1000)", "label": "Lag (sec) - Avg", "id": "m3_avg", "visible": false, "region": "REGION", "stat": "Average", "period": 300 } ], - [ { "expression": "IF(DIFF_TIME(m3_avg) > PERIOD(m3_avg), 0/0, -1*RATE(m3_avg)+1)", "label": "Speedup Factor - Avg", "id": "m4_avg", "region": "REGION", "stat": "Average", "period": 300 } ], - [ "OpenSearchMigrations", "lagBetweenSourceAndTargetRequests", "OTelLib", "replayer", { "id": "m1_avg", "region": "REGION", "visible": false } ], - [ { "expression": "FLOOR(m1_min/1000)", "label": "Lag (sec) - Min", "id": "m3_min", "visible": false, "region": "REGION", "stat": "Average", "period": 300 } ], - [ { "expression": "IF(DIFF_TIME(m3_min) > PERIOD(m3_min), 0/0, -1*RATE(m3_min)+1)", "label": "Speedup Factor - Min", "id": "m4_min", "region": "REGION", "stat": "Average", "period": 300 } ], - [ "OpenSearchMigrations", "lagBetweenSourceAndTargetRequests", "OTelLib", "replayer", { "id": "m1_min", "region": "REGION", "visible": false, "stat": "Minimum" } ], - [ { "expression": "FLOOR(m1_max/1000)", "label": "Lag (sec) - Max", "id": "m3_max", "visible": false, "region": "REGION", "stat": "Average", "period": 300 } ], - [ { "expression": "IF(DIFF_TIME(m3_max) > PERIOD(m3_max), 0/0, -1*RATE(m3_max)+1)", "label": "Speedup Factor - Max", "id": "m4_max", "region": "REGION", "stat": "Average", "period": 300, "visible": false } ], - [ "OpenSearchMigrations", "lagBetweenSourceAndTargetRequests", "OTelLib", "replayer", { "id": "m1_max", "region": "REGION", "visible": false, "stat": "Maximum" } ] + [ { "expression": "FLOOR(m1_avg/1000)", "label": "Lag (sec) - Avg", "id": "m3_avg", "visible": false, "region": "REGION", "stat": "Average", "period": 60 } ], + [ { "expression": "IF(DIFF_TIME(m3_avg) > PERIOD(m3_avg), 0/0, -1*RATE(m3_avg)+1)", "label": "Speedup Factor - Avg", "id": "m4_avg", "region": "REGION", "stat": "Average", "period": 60 } ], + [ "OpenSearchMigrations", "lagBetweenSourceAndTargetRequests", "qualifier", "MA_QUALIFIER", "OTelLib", "replayer", { "id": "m1_avg", "region": "REGION", "visible": false } ] ], "view": "timeSeries", "title": "Actual Speedup Factor", @@ -241,7 +242,7 @@ } }, "stat": "Average", - "period": 300 + "period": 60 } }, { @@ -252,7 +253,7 @@ "type": "metric", "properties": { "metrics": [ - [ "OpenSearchMigrations", "bytesReadFromTarget", "OTelLib", "replayer", { "label": "Response Bytes", "region": "REGION", "id": "c1", "visible": false } ], + [ "OpenSearchMigrations", "bytesReadFromTarget", "qualifier", "MA_QUALIFIER", "OTelLib", "replayer", { "label": "Response Bytes", "region": "REGION", "id": "c1", "visible": false } ], [ ".", "bytesWrittenToTarget", ".", ".", { "label": "Request Bytes", "region": "REGION", "id": "c2", "visible": false } ], [ { "expression": "(c1 + c2) / (1024*1024) / PERIOD(c2)", "label": "Total", "id": "e1", "region": "REGION" } ], [ { "expression": "(c2) / (1024*1024) / PERIOD(c2)", "label": "Request", "id": "e2", "region": "REGION" } ], @@ -281,12 +282,12 @@ "type": "metric", "properties": { "metrics": [ - [ { "expression": "SEARCH('{OpenSearchMigrations,OTelLib,method,sourceStatusCode,statusCodesMatch,targetStatusCode} statusCodesMatch=\"false\"', 'Sum', 300)", "id": "e2", "period": 300, "region": "REGION" } ] + [ { "expression": "SEARCH('{OpenSearchMigrations,OTelLib,method,qualifier,sourceStatusCode,statusCodesMatch,targetStatusCode} qualifier=\"MA_QUALIFIER\" statusCodesMatch=\"false\" MetricName=\"tupleComparison\"', 'Sum', 60)", "id": "e2", "period": 60, "region": "REGION", "label": "${PROP('Dim.method')} Status code class source: ${PROP('Dim.sourceStatusCode')} target: ${PROP('Dim.targetStatusCode')}" } ] ], "sparkline": true, "view": "timeSeries", "region": "REGION", - "period": 300, + "period": 60, "stat": "Sum", "title": "Replayer Status Code Mismatches", "setPeriodToTimeRange": true, @@ -335,13 +336,12 @@ "properties": { "metrics": [ [ { "expression": "lag_avg_ms/1000", "label": "Expression3", "id": "e3", "region": "REGION", "stat": "Average", "period": 300, "visible": false } ], - [ "OpenSearchMigrations", "lagBetweenSourceAndTargetRequests", "OTelLib", "replayer", { "id": "lag_avg_ms", "region": "REGION", "visible": false, "label": "lag_avg_ms" } ], + [ "OpenSearchMigrations", "lagBetweenSourceAndTargetRequests", "qualifier", "MA_QUALIFIER", "OTelLib", "replayer", { "id": "lag_avg_ms", "region": "REGION", "label": "lag_avg_ms", "visible": false } ], [ { "expression": "lag_avg_ms/1000", "label": "lag_avg_sec", "id": "lag_avg_sec", "region": "REGION", "stat": "Average", "period": 300, "visible": false } ], - [ { "expression": "FLOOR(lag_avg_sec/60/5 + 0.5)*60*5", "label": "lag_avg_sec_cleaned", "id": "lag_avg_sec_cleaned", "visible": false, "region": "REGION", "stat": "Average", "period": 300 } ], - [ { "expression": "lag_avg_sec/3600", "label": "lag_avg_hr", "id": "lag_avg_hr", "visible": false, "region": "REGION", "stat": "Average", "period": 300 } ], - [ { "expression": "-1*RATE(lag_avg_sec_cleaned)+1", "label": "Speedup Factor", "id": "speedup", "region": "REGION", "stat": "Average", "period": 300, "visible": false } ], - [ { "expression": "IF(DIFF_TIME(speedup) > PERIOD(speedup), 0/0, speedup)", "label": "Speedup Factor (cleaned)", "id": "speedup_cleaned", "region": "REGION", "period": 300, "visible": false } ], - [ { "expression": "lag_avg_hr/(speedup_cleaned-1)", "label": "catchup_hours", "id": "catchup_hours", "region": "REGION", "period": 300 } ] + [ { "expression": "lag_avg_sec/3600", "label": "lag_avg_hr", "id": "lag_avg_hr", "region": "REGION", "stat": "Average", "period": 300, "visible": false } ], + [ { "expression": "-1*RATE(lag_avg_sec)+1", "label": "Speedup Factor", "id": "speedup", "region": "REGION", "stat": "Average", "period": 300, "visible": false } ], + [ { "expression": "IF(DIFF_TIME(speedup) > PERIOD(speedup), speedup, speedup)", "label": "Speedup Factor (cleaned)", "id": "speedup_cleaned", "region": "REGION", "period": 300, "visible": false } ], + [ { "expression": "IF(speedup_cleaned > lag_avg_hr*60 , 0, IF(speedup_cleaned<=1, 0/0, lag_avg_hr/(speedup_cleaned-1)))", "label": "catchup_hours", "id": "catchup_hours", "region": "REGION", "period": 300 } ] ], "view": "singleValue", "title": "Time to catchup in hours", @@ -358,7 +358,7 @@ "setPeriodToTimeRange": false, "singleValueFullPrecision": false, "sparkline": true, - "stacked": true, + "stacked": false, "liveData": false } }, @@ -370,12 +370,12 @@ "type": "metric", "properties": { "metrics": [ - [ "OpenSearchMigrations", "bytesReadFromTarget", "OTelLib", "replayer", { "label": "Response Bytes", "region": "REGION", "id": "r1", "visible": false } ], + [ "OpenSearchMigrations", "bytesReadFromTarget", "qualifier", "MA_QUALIFIER", "OTelLib", "replayer", { "label": "Response Bytes", "region": "REGION", "id": "r1", "visible": false } ], [ ".", "bytesWrittenToTarget", ".", ".", { "label": "Request Bytes", "region": "REGION", "id": "r2", "visible": false } ], [ { "expression": "(r1 + r2) / (1024*1024) / PERIOD(r2)", "label": "Replayer", "id": "e1", "region": "REGION" } ], [ { "expression": "(r2) / (1024*1024) / PERIOD(r2)", "label": "Request", "id": "e2", "region": "REGION", "visible": false } ], [ { "expression": "(r1)/ (1024*1024) / PERIOD(r1)", "label": "Response", "id": "e3", "region": "REGION", "visible": false } ], - [ "OpenSearchMigrations", "bytesRead", "OTelLib", "captureProxy", { "label": "Response Bytes", "region": "REGION", "id": "c1", "visible": false } ], + [ "OpenSearchMigrations", "bytesRead", "qualifier", "MA_QUALIFIER", "OTelLib", "captureProxy", { "label": "Response Bytes", "region": "REGION", "id": "c1", "visible": false } ], [ ".", "bytesWritten", ".", ".", { "label": "Request Bytes", "region": "REGION", "id": "c2", "visible": false } ], [ { "expression": "(c1 + c2) / 1048576 / PERIOD(c1)", "label": "Proxy", "id": "e4", "region": "REGION" } ] ], diff --git a/deployment/cdk/opensearch-service-migration/lib/components/reindex-from-snapshot-dashboard.json b/deployment/cdk/opensearch-service-migration/lib/components/reindex-from-snapshot-dashboard.json index c498b98081..07dae1e946 100644 --- a/deployment/cdk/opensearch-service-migration/lib/components/reindex-from-snapshot-dashboard.json +++ b/deployment/cdk/opensearch-service-migration/lib/components/reindex-from-snapshot-dashboard.json @@ -6,26 +6,15 @@ "inputType": "input", "id": "REGION", "label": "Region", - "defaultValue": "us-east-1", + "defaultValue": "placeholder-region", "visible": false }, - { - "type": "property", - "property": "DomainName", - "inputType": "select", - "id": "TC_DOMAIN_NAME", - "label": "Target Cluster Domain Name", - "search": "{AWS/ES,ClientId,DomainName} MetricName=\"CPUUtilization\"", - "populateFrom": "DomainName", - "defaultValue": "placeholder-name", - "visible": true - }, { "type": "pattern", "pattern": "MA_STAGE", "inputType": "input", "id": "MA_STAGE", - "label": "Migration Assistant Stage", + "label": "Migration Stage", "defaultValue": "placeholder-stage", "visible": false }, @@ -37,6 +26,15 @@ "label": "Account ID", "defaultValue": "ACCOUNT_ID", "visible": false + }, + { + "type": "pattern", + "pattern": "MA_QUALIFIER", + "inputType": "input", + "id": "MA_QUALIFIER", + "label": "Migration Qualifier", + "defaultValue": "placeholder-qualifier", + "visible": false } ], "widgets": [ @@ -218,7 +216,7 @@ "type": "metric", "properties": { "metrics": [ - [ "OpenSearchMigrations", "bytesSent", "OTelLib", "documentMigration", { "region": "REGION", "label": "Bytes Sent - MIN - ${MIN}, MAX - ${MAX}, AVG - ${AVG}" } ] + [ "OpenSearchMigrations", "bytesSent", "qualifier", "MA_QUALIFIER", "OTelLib", "documentMigration", { "region": "REGION", "label": "Bytes Sent - MIN - ${MIN}, MAX - ${MAX}, AVG - ${AVG}" } ] ], "period": 60, "region": "REGION", diff --git a/deployment/cdk/opensearch-service-migration/lib/constructs/migration-dashboard.ts b/deployment/cdk/opensearch-service-migration/lib/constructs/migration-dashboard.ts index 74ccbea132..3a7ee87aa2 100644 --- a/deployment/cdk/opensearch-service-migration/lib/constructs/migration-dashboard.ts +++ b/deployment/cdk/opensearch-service-migration/lib/constructs/migration-dashboard.ts @@ -36,7 +36,8 @@ export class MigrationDashboard extends Construct { const variableSetters = { 'ACCOUNT_ID': props.account, 'REGION': props.region, - 'MA_STAGE': props.stage + 'MA_STAGE': props.stage, + 'MA_QUALIFIER': props.stage }; Object.entries(variableSetters).forEach(([varName, value]) => { diff --git a/deployment/cdk/opensearch-service-migration/lib/service-stacks/migration-otel-collector-sidecar.ts b/deployment/cdk/opensearch-service-migration/lib/service-stacks/migration-otel-collector-sidecar.ts index 6766dfd2e5..5e534a8511 100644 --- a/deployment/cdk/opensearch-service-migration/lib/service-stacks/migration-otel-collector-sidecar.ts +++ b/deployment/cdk/opensearch-service-migration/lib/service-stacks/migration-otel-collector-sidecar.ts @@ -1,8 +1,9 @@ import { + AwsLogDriverMode, + ContainerDependencyCondition, + LogDrivers, PortMapping, Protocol, - LogDrivers, - AwsLogDriverMode, TaskDefinition } from "aws-cdk-lib/aws-ecs"; import {LogGroup, RetentionDays} from "aws-cdk-lib/aws-logs"; @@ -18,7 +19,7 @@ export class OtelCollectorSidecar { return "http://localhost:" + OtelCollectorSidecar.OTEL_CONTAINER_PORT; } - static addOtelCollectorContainer(taskDefinition: TaskDefinition, logGroupPrefix: string) { + static addOtelCollectorContainer(taskDefinition: TaskDefinition, logGroupPrefix: string, stage: string) { const otelCollectorPort: PortMapping = { name: "otel-collector-connect", hostPort: this.OTEL_CONTAINER_PORT, @@ -48,6 +49,10 @@ export class OtelCollectorSidecar { logGroup: serviceLogGroup, mode: AwsLogDriverMode.BLOCKING, }), + environment: { + "QUALIFIER": stage, + "TRACE_SAMPLING_PERCENTAGE": "1" + }, essential: true, healthCheck: { command: ["CMD", "/healthcheck"], @@ -58,6 +63,10 @@ export class OtelCollectorSidecar { } }); taskDefinition.addToTaskRolePolicy(createAwsDistroForOtelPushInstrumentationPolicy()); + taskDefinition.defaultContainer?.addContainerDependencies({ + container: otelCollectorContainer, + condition: ContainerDependencyCondition.HEALTHY + }); return otelCollectorContainer; } diff --git a/deployment/cdk/opensearch-service-migration/lib/service-stacks/migration-service-core.ts b/deployment/cdk/opensearch-service-migration/lib/service-stacks/migration-service-core.ts index 0ad1130de4..4f0e0e6586 100644 --- a/deployment/cdk/opensearch-service-migration/lib/service-stacks/migration-service-core.ts +++ b/deployment/cdk/opensearch-service-migration/lib/service-stacks/migration-service-core.ts @@ -151,7 +151,7 @@ export class MigrationServiceCore extends Stack { } if (props.otelCollectorEnabled) { - OtelCollectorSidecar.addOtelCollectorContainer(serviceTaskDef, serviceLogGroup.logGroupName); + OtelCollectorSidecar.addOtelCollectorContainer(serviceTaskDef, serviceLogGroup.logGroupName, props.stage); } const fargateService = new FargateService(this, "ServiceFargateService", {