open-telemetry
diff --git a/‎.github/copilot-instructions.md‎
Lines changed: 0 additions & 4 deletions b/‎.github/copilot-instructions.md‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎.github/workflows/pipeline-perf-on-label.yaml‎
Lines changed: 6 additions & 65 deletions b/‎.github/workflows/pipeline-perf-on-label.yaml‎
Lines changed: 6 additions & 65 deletions
diff --git a/‎.github/workflows/rust-ci.yml‎
Lines changed: 37 additions & 0 deletions b/‎.github/workflows/rust-ci.yml‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 4 additions & 0 deletions b/‎AGENTS.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 1 addition & 2 deletions b/‎CLAUDE.md‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rust/otap-dataflow/README.md‎
Lines changed: 2 additions & 2 deletions b/‎rust/otap-dataflow/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎rust/otap-dataflow/benchmarks/benches/attribute_transform/main.rs‎
Lines changed: 17 additions & 3 deletions b/‎rust/otap-dataflow/benchmarks/benches/attribute_transform/main.rs‎
Lines changed: 17 additions & 3 deletions
diff --git a/‎rust/otap-dataflow/crates/config/src/node.rs‎
Lines changed: 5 additions & 5 deletions b/‎rust/otap-dataflow/crates/config/src/node.rs‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎rust/otap-dataflow/crates/core-nodes/src/processors/attributes_processor/mod.rs‎
Lines changed: 65 additions & 0 deletions b/‎rust/otap-dataflow/crates/core-nodes/src/processors/attributes_processor/mod.rs‎
Lines changed: 65 additions & 0 deletions
@@ -1,8 +1,8 @@
-# This action runs the pipeline perf continuous benchmarking suite on every PR.
-# - With 'pipelineperf' label: runs on dedicated Oracle bare-metal hardware for accurate benchmarks
-# - Without label: runs on ubuntu-latest for basic validation
-# In either case, the results does not update the charts.
-name: Pipeline Perf Pre-Merge
+# This action runs the pipeline perf benchmarking suite on dedicated Oracle
+# bare-metal hardware when the 'pipelineperf' label is added to a PR.
+# Basic perf validation on ubuntu-latest is handled by Rust-CI (rust-ci.yml).
+# The results from this workflow do not update the charts.
+name: Pipeline Perf Dedicated
 
 on:
   pull_request:
@@ -18,29 +18,9 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  # Check for the pipelineperf label to determine which runner to use
-  label-check:
-    name: Check for pipelineperf label
-    runs-on: ubuntu-latest
-    outputs:
-      has_label: ${{ steps.check_label.outputs.has_label }}
-    steps:
-      - name: Check if PR has 'pipelineperf' label
-        id: check_label
-        run: |
-          labels=$(echo '${{ toJson(github.event.pull_request.labels) }}' | jq -r '.[].name')
-          if echo "$labels" | grep -q "pipelineperf"; then
-            echo "Label pipelineperf found - will use dedicated hardware"
-            echo "has_label=true" >> $GITHUB_OUTPUT
-          else
-            echo "Label 'pipelineperf' not found - will use ubuntu-latest"
-            echo "has_label=false" >> $GITHUB_OUTPUT
-          fi
-
   # Run on dedicated Oracle hardware when 'pipelineperf' label is present
   pipeline-perf-test-dedicated:
-    needs: label-check
-    if: needs.label-check.outputs.has_label == 'true'
+    if: contains(github.event.pull_request.labels.*.name, 'pipelineperf')
     runs-on: oracle-bare-metal-64cpu-1024gb-x86-64-ubuntu-24
     steps:
       - name: Harden the runner (Audit all outbound calls)
@@ -122,42 +102,3 @@ jobs:
           echo ""
           echo "=== Docker disk usage ==="
           docker system df -v 2>/dev/null || true
-
-  # Run on ubuntu-latest for basic validation when no label is present
-  pipeline-perf-test-basic:
-    needs: label-check
-    if: needs.label-check.outputs.has_label == 'false'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@fe104658747b27e96e4f7e80cd0a94068e53901d # v2.16.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-
-      - name: Set up Python
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
-        with:
-          python-version: "3.14"
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
-
-      - name: Build dataflow_engine
-        run: |
-          git submodule init
-          git submodule update
-          cd rust/otap-dataflow
-          docker buildx build --load --build-context otel-arrow=../../ -f Dockerfile -t df_engine .
-          cd ../..
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --user --require-hashes -r tools/pipeline_perf_test/orchestrator/requirements.lock.txt
-          python -m pip install --user --require-hashes -r tools/pipeline_perf_test/load_generator/requirements.lock.txt
-
-      - name: Run pipeline performance test suite
-        run: |
-          cd tools/pipeline_perf_test
-          python orchestrator/run_orchestrator.py --config test_suites/integration/continuous/100klrps-docker.yaml
@@ -737,6 +737,38 @@ jobs:
           reporter: java-junit
           fail-on-error: false
 
+  # Pipeline performance test - validates that Rust changes don't regress performance.
+  pipeline_perf_test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          submodules: true
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: "3.14"
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
+      - name: Free disk space
+        run: |
+          sudo rm -rf /usr/lib/jvm /usr/share/dotnet /usr/share/swift /usr/local/.ghcup
+          sudo rm -rf /usr/local/julia* /usr/local/lib/android /usr/local/share/chromium
+          sudo rm -rf /opt/microsoft /opt/google /opt/az /usr/local/share/powershell
+      - name: Build dataflow_engine
+        run: |
+          cd rust/otap-dataflow
+          docker buildx build --load --build-context otel-arrow=../../ -f Dockerfile -t df_engine .
+          cd ../..
+      - name: Install dependencies
+        run: |
+          python -m pip install --user --require-hashes -r tools/pipeline_perf_test/orchestrator/requirements.lock.txt
+          python -m pip install --user --require-hashes -r tools/pipeline_perf_test/load_generator/requirements.lock.txt
+      - name: Run pipeline performance test suite
+        run: |
+          cd tools/pipeline_perf_test
+          python orchestrator/run_orchestrator.py --config test_suites/integration/continuous/100klrps-docker.yaml
+
   # Aggregated status check - depends only on the required matrix combinations.
   # Add/remove jobs from the needs list to change what is required via PR,
   # rather than updating GitHub branch protection settings directly.
@@ -753,6 +785,7 @@ jobs:
       - compile_proto
       - pest-fmt
       - no_default_features_check
+      - pipeline_perf_test
     steps:
       - name: Check if all required jobs succeeded
         run: |
@@ -788,4 +821,8 @@ jobs:
             echo "no_default_features_check failed or was cancelled"
             exit 1
           fi
+          if [[ "${{ needs.pipeline_perf_test.result }}" != "success" ]]; then
+            echo "pipeline_perf_test failed or was cancelled"
+            exit 1
+          fi
           echo "All required checks passed!"
@@ -0,0 +1,4 @@
+# Agent Instructions
+
+If working on Rust code (i.e., the `rust/` directory), read and follow all
+instructions in [rust/otap-dataflow/AGENTS.md](rust/otap-dataflow/AGENTS.md).
@@ -1,4 +1,3 @@
 # CLAUDE.md
 
-If working on Rust code (i.e., the `rust/` directory), read and follow all
-instructions in [rust/otap-dataflow/AGENTS.md](rust/otap-dataflow/AGENTS.md).
+See [AGENTS.md](AGENTS.md).
@@ -211,7 +211,6 @@ repository](https://github.com/open-telemetry/community/blob/main/guides/contrib
 
 - [Cijo Thomas](https://github.com/cijothomas), Microsoft
 - [Lalit Kumar Bhasin](https://github.com/lalitb), Microsoft
-- [Lei Huang](https://github.com/v0y4g3r), Greptime
 - [Utkarsh Umesan Pillai](https://github.com/utpilla), Microsoft
 
 For more information about the approver role, see the [community
@@ -227,6 +226,7 @@ repository](https://github.com/open-telemetry/community/blob/main/guides/contrib
 ### Emeritus
 
 - [Alex Boten](https://github.com/codeboten), Approver
+- [Lei Huang](https://github.com/v0y4g3r), Approver
 - [Moh Osman](https://github.com/moh-osman3), Approver
 
 ### Thanks to all of our contributors
 
@@ -187,8 +187,8 @@ A simple component to produce synthetic data from semantic convention registries
 #### Batch processor
 
 A batching processor that works directly with OTAP records. This is
-[based on lower-level support in the `otal_arrow_rust`
-crate](../otel-arrow-rust/src/otap/batching.rs).
+[based on lower-level support in the `otap-df-pdata`
+crate](./crates/pdata/src/otap/batching.rs).
 
 #### OTAP exporter
 
 
@@ -35,22 +35,27 @@ fn generate_native_keys_attr_batch(
     key_gen: impl Fn(usize) -> String,
 ) -> RecordBatch {
     let mut keys_arr = StringBuilder::new();
+    let mut parent_ids = Vec::new();
     for i in 0..num_rows {
         let attr_key = key_gen(i);
         keys_arr.append_value(attr_key);
+        parent_ids.push((i % 10) as u16);
     }
     let keys_arr = keys_arr.finish();
+    let parent_ids = UInt16Array::from(parent_ids);
 
     let type_arr = UInt8Array::from_iter_values(std::iter::repeat_n(
         AttributeValueType::Empty as u8,
         keys_arr.len(),
     ));
+
     RecordBatch::try_new(
         Arc::new(Schema::new(vec![
+            Field::new(consts::PARENT_ID, DataType::UInt16, false).with_plain_encoding(),
             Field::new(consts::ATTRIBUTE_TYPE, DataType::UInt8, false),
             Field::new(consts::ATTRIBUTE_KEY, DataType::Utf8, false),
         ])),
-        vec![Arc::new(type_arr), Arc::new(keys_arr)],
+        vec![Arc::new(parent_ids), Arc::new(type_arr), Arc::new(keys_arr)],
     )
     .expect("expect no error")
 }
@@ -62,18 +67,24 @@ fn generate_dict_keys_attribute_batch(
 ) -> RecordBatch {
     let mut keys_dict_values_arr = StringBuilder::new();
     let mut keys_dict_keys_arr = PrimitiveBuilder::<UInt16Type>::new();
+    let mut parent_ids = Vec::new();
     for i in 0..num_keys {
         let attr_key = key_gen(i);
         keys_dict_values_arr.append_value(attr_key);
         keys_dict_keys_arr.append_value_n(i as u16, rows_per_key);
+        for j in 0..rows_per_key {
+            parent_ids.push(((i * rows_per_key + j) % 10) as u16);
+        }
     }
 
     let keys_arr = DictionaryArray::new(
         keys_dict_keys_arr.finish(),
         Arc::new(keys_dict_values_arr.finish()),
     );
+    let parent_ids = UInt16Array::from(parent_ids);
 
     let schema = Arc::new(Schema::new(vec![
+        Field::new(consts::PARENT_ID, DataType::UInt16, false).with_plain_encoding(),
         Field::new(consts::ATTRIBUTE_TYPE, DataType::UInt8, false),
         Field::new(
             consts::ATTRIBUTE_KEY,
@@ -85,8 +96,11 @@ fn generate_dict_keys_attribute_batch(
         AttributeValueType::Empty as u8,
         keys_arr.len(),
     ));
-    RecordBatch::try_new(schema, vec![Arc::new(type_arr), Arc::new(keys_arr)])
-        .expect("expect no error")
+    RecordBatch::try_new(
+        schema,
+        vec![Arc::new(parent_ids), Arc::new(type_arr), Arc::new(keys_arr)],
+    )
+    .expect("expect no error")
 }
 
 fn bench_transform_attributes(c: &mut Criterion) {
 
@@ -11,7 +11,7 @@
 use crate::error::Error;
 use crate::pipeline::telemetry::{AttributeValue, TelemetryAttribute};
 use crate::transport_headers_policy::{HeaderCapturePolicy, HeaderPropagationPolicy};
-use crate::{CapabilityId, Description, NodeId, NodeUrn, PortName};
+use crate::{CapabilityId, Description, ExtensionId, NodeUrn, PortName};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
@@ -25,7 +25,7 @@ use std::collections::HashMap;
 /// and returns an error so the user gets immediate feedback.
 fn deserialize_no_dup_keys<'de, D>(
     deserializer: D,
-) -> Result<HashMap<CapabilityId, NodeId>, D::Error>
+) -> Result<HashMap<CapabilityId, ExtensionId>, D::Error>
 where
     D: serde::Deserializer<'de>,
 {
@@ -35,7 +35,7 @@ where
     struct NoDupVisitor;
 
     impl<'de> Visitor<'de> for NoDupVisitor {
-        type Value = HashMap<CapabilityId, NodeId>;
+        type Value = HashMap<CapabilityId, ExtensionId>;
 
         fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
             f.write_str("a map with no duplicate keys")
@@ -49,7 +49,7 @@ where
                         "duplicate capability key '{key}'"
                     )));
                 }
-                let _ = result.insert(CapabilityId::from(key), NodeId::from(value));
+                let _ = result.insert(CapabilityId::from(key), ExtensionId::from(value));
             }
             Ok(result)
         }
@@ -115,7 +115,7 @@ pub struct NodeUserConfig {
         skip_serializing_if = "HashMap::is_empty",
         deserialize_with = "deserialize_no_dup_keys"
     )]
-    pub capabilities: HashMap<CapabilityId, NodeId>,
+    pub capabilities: HashMap<CapabilityId, ExtensionId>,
 
     /// Entity configuration for the node.
     ///
 
@@ -716,6 +716,71 @@ mod tests {
             })
             .validate(|_| async move {});
     }
+    #[test]
+    fn test_rename_removes_duplicate_keys() {
+        // Prepare input with key "a" and "b"
+        let input = build_logs_with_attrs(
+            vec![],
+            vec![],
+            vec![
+                KeyValue::new("a", AnyValue::new_string("value_a")),
+                KeyValue::new("b", AnyValue::new_string("value_b")),
+            ],
+        );
+
+        let cfg = json!({
+            "actions": [
+                {"action": "rename", "source_key": "a", "destination_key": "b"}
+            ]
+        });
+
+        let telemetry_registry_handle = TelemetryRegistryHandle::new();
+        let controller_ctx = ControllerContext::new(telemetry_registry_handle);
+        let pipeline_ctx =
+            controller_ctx.pipeline_context_with("grp".into(), "pipeline".into(), 0, 1, 0);
+
+        let node = test_node("attributes-processor-test-dup");
+        let rt: TestRuntime<OtapPdata> = TestRuntime::new();
+        let mut node_config = NodeUserConfig::new_processor_config(ATTRIBUTES_PROCESSOR_URN);
+        node_config.config = cfg;
+        let proc =
+            create_attributes_processor(pipeline_ctx, node, Arc::new(node_config), rt.config())
+                .expect("create processor");
+        let phase = rt.set_processor(proc);
+
+        phase
+            .run_test(|mut ctx| async move {
+                let mut bytes = BytesMut::new();
+                input.encode(&mut bytes).expect("encode");
+                let bytes = bytes.freeze();
+                let pdata_in =
+                    OtapPdata::new_default(OtlpProtoBytes::ExportLogsRequest(bytes).into());
+                ctx.process(Message::PData(pdata_in))
+                    .await
+                    .expect("process");
+
+                let out = ctx.drain_pdata().await;
+                let first = out.into_iter().next().expect("one output").payload();
+
+                let otlp_bytes: OtlpProtoBytes = first.try_into().expect("convert to otlp");
+                let bytes = match otlp_bytes {
+                    OtlpProtoBytes::ExportLogsRequest(b) => b,
+                    _ => panic!("unexpected otlp variant"),
+                };
+                let decoded = ExportLogsServiceRequest::decode(bytes.as_ref()).expect("decode");
+
+                let log_attrs = &decoded.resource_logs[0].scope_logs[0].log_records[0].attributes;
+
+                // Expect no "a" and exactly one "b"
+                assert!(!log_attrs.iter().any(|kv| kv.key == "a"));
+                let b_count = log_attrs.iter().filter(|kv| kv.key == "b").count();
+                assert_eq!(
+                    b_count, 1,
+                    "There should be exactly one key 'b' (no duplicates)"
+                );
+            })
+            .validate(|_| async move {});
+    }
 
     #[test]
     fn test_delete_applies_to_signal_only_by_default() {