feat(nimbus): filter YAML export results_data to overall significant metrics (#14952)

jaredlockhart · claude · web-flow · commit 971b286319a3 · 2026-03-17T23:02:56.000Z
Because * The raw results_data JSON includes weekly/daily window data and neutral (non-significant) metrics, producing far more data than needed * Consumers of the YAML export (e.g., chatbot assistant) only need to know which metrics an experiment significantly shifted This commit * Changes results_data from a raw field to a SerializerMethodField on NimbusExperimentYamlSerializer * Filters to only include the overall analysis window (excludes weekly/daily) * Filters to only include metrics with at least one significant (positive or negative) branch comparison, excluding neutral metrics * Preserves other_metrics metadata for context * Returns None (omitted from YAML) when no significant results exist * Adds tests for filtering, null/missing data, and all-neutral edge cases Fixes #14951 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/docs/experimenter/openapi-schema.json b/docs/experimenter/openapi-schema.json
@@ -1471,8 +1471,8 @@
             "readOnly": true
           },
           "results_data": {
-            "type": "object",
-            "nullable": true
+            "type": "string",
+            "readOnly": true
           }
         },
         "required": [
diff --git a/docs/experimenter/swagger-ui.html b/docs/experimenter/swagger-ui.html
@@ -1483,8 +1483,8 @@
             "readOnly": true
           },
           "results_data": {
-            "type": "object",
-            "nullable": true
+            "type": "string",
+            "readOnly": true
           }
         },
         "required": [
diff --git a/experimenter/experimenter/experiments/api/v5/serializers.py b/experimenter/experimenter/experiments/api/v5/serializers.py
@@ -275,6 +275,7 @@ class NimbusExperimentYamlSerializer(serializers.ModelSerializer):
     excluded_experiments = serializers.SerializerMethodField()
     application_display = serializers.SerializerMethodField()
     parent_experiment = serializers.SerializerMethodField()
+    results_data = serializers.SerializerMethodField()
 
     class Meta:
         model = NimbusExperiment
@@ -458,6 +459,65 @@ def get_parent_experiment(self, obj):
             return f"{obj.parent.name} ({obj.parent.slug})"
         return None
 
+    def get_results_data(self, obj):
+        if not obj.results_data:
+            return None
+
+        v3 = obj.results_data.get("v3")
+        if not v3:
+            return None
+
+        overall = v3.get("overall")
+        if not overall:
+            return None
+
+        filtered_overall = {}
+        for basis, segments in overall.items():
+            filtered_segments = {}
+            for segment, branches in segments.items():
+                filtered_branches = {}
+                for branch, branch_data_wrapper in branches.items():
+                    bd = branch_data_wrapper.get("branch_data", {})
+                    filtered_groups = {}
+                    for group, metrics in bd.items():
+                        filtered_metrics = {
+                            metric: metric_data
+                            for metric, metric_data in metrics.items()
+                            if self._metric_is_significant(metric_data)
+                        }
+                        if filtered_metrics:
+                            filtered_groups[group] = filtered_metrics
+
+                    if filtered_groups:
+                        filtered_branches[branch] = {
+                            "branch_data": filtered_groups,
+                            "is_control": branch_data_wrapper.get("is_control", False),
+                        }
+
+                if filtered_branches:
+                    filtered_segments[segment] = filtered_branches
+
+            if filtered_segments:
+                filtered_overall[basis] = filtered_segments
+
+        if not filtered_overall:
+            return None
+
+        result = {"v3": {"overall": filtered_overall}}
+        if "other_metrics" in v3:
+            result["v3"]["other_metrics"] = v3["other_metrics"]
+        return result
+
+    @staticmethod
+    def _metric_is_significant(metric_data):
+        significance = metric_data.get("significance", {})
+        for branch_sig in significance.values():
+            overall_sig = branch_sig.get("overall", {})
+            for value in overall_sig.values():
+                if value in ("positive", "negative"):
+                    return True
+        return False
+
 
 class NimbusBranchScreenshotReviewSerializer(NimbusBranchScreenshotSerializer):
     # Round-trip serialization & validation for review can use a string path
diff --git a/experimenter/experimenter/experiments/tests/api/v5/test_views.py b/experimenter/experimenter/experiments/tests/api/v5/test_views.py
@@ -258,8 +258,33 @@ def test_yaml_contains_all_fields(self):
             results_data={
                 "v3": {
                     "overall": {
-                        "enrollments": {"all": {"percentage": 100.0, "population": 1000}}
-                    }
+                        "enrollments": {
+                            "all": {
+                                "control": {
+                                    "branch_data": {
+                                        "search_metrics": {
+                                            "search_count": {
+                                                "absolute": {
+                                                    "all": [{"point": 10.0}],
+                                                    "first": {},
+                                                },
+                                                "difference": {},
+                                                "relative_uplift": {},
+                                                "significance": {
+                                                    "control": {"overall": {}},
+                                                    "treatment": {
+                                                        "overall": {"1": "positive"}
+                                                    },
+                                                },
+                                            }
+                                        }
+                                    },
+                                    "is_control": True,
+                                }
+                            }
+                        }
+                    },
+                    "weekly": {"enrollments": {"all": {"control": {"branch_data": {}}}}},
                 }
             },
         )
@@ -323,11 +348,14 @@ def test_yaml_contains_all_fields(self):
         slugs = [fc["slug"] for fc in exp["feature_configs"]]
         self.assertIn("test-feature", slugs)
 
-        # Results data
-        self.assertEqual(
-            exp["results_data"]["v3"]["overall"]["enrollments"]["all"]["population"],
-            1000,
-        )
+        # Results data: only overall with significant metrics, weekly excluded
+        rd = exp["results_data"]
+        self.assertIn("overall", rd["v3"])
+        self.assertNotIn("weekly", rd["v3"])
+        search = rd["v3"]["overall"]["enrollments"]["all"]["control"]["branch_data"][
+            "search_metrics"
+        ]["search_count"]
+        self.assertEqual(search["significance"]["treatment"]["overall"]["1"], "positive")
 
     def test_default_hypothesis_excluded(self):
         application = NimbusExperiment.Application.DESKTOP
@@ -456,6 +484,229 @@ def test_pagination_multiple_pages(self):
             finally:
                 YamlExportPagination.page_size = original_page_size
 
+    def test_results_data_filters_to_overall_significant_only(self):
+        """Results data should only include overall window and significant metrics."""
+        application = NimbusExperiment.Application.DESKTOP
+        feature_config = NimbusFeatureConfigFactory.create(application=application)
+
+        results_data = {
+            "v3": {
+                "overall": {
+                    "enrollments": {
+                        "all": {
+                            "control": {
+                                "branch_data": {
+                                    "search_metrics": {
+                                        "search_count": {
+                                            "absolute": {
+                                                "all": [
+                                                    {
+                                                        "point": 10.0,
+                                                        "lower": 9.5,
+                                                        "upper": 10.5,
+                                                    }
+                                                ],
+                                                "first": {},
+                                            },
+                                            "difference": {
+                                                "treatment": {
+                                                    "all": [
+                                                        {
+                                                            "point": 0.5,
+                                                            "lower": 0.1,
+                                                            "upper": 0.9,
+                                                        }
+                                                    ]
+                                                }
+                                            },
+                                            "relative_uplift": {
+                                                "treatment": {
+                                                    "all": [
+                                                        {
+                                                            "point": 0.05,
+                                                            "lower": 0.01,
+                                                            "upper": 0.09,
+                                                        }
+                                                    ]
+                                                }
+                                            },
+                                            "significance": {
+                                                "control": {"overall": {}},
+                                                "treatment": {
+                                                    "overall": {"1": "positive"}
+                                                },
+                                            },
+                                        }
+                                    },
+                                    "other_metrics": {
+                                        "neutral_metric": {
+                                            "absolute": {
+                                                "all": [{"point": 5.0}],
+                                                "first": {},
+                                            },
+                                            "difference": {},
+                                            "relative_uplift": {},
+                                            "significance": {
+                                                "control": {"overall": {}},
+                                                "treatment": {
+                                                    "overall": {"1": "neutral"}
+                                                },
+                                            },
+                                        },
+                                        "negative_metric": {
+                                            "absolute": {
+                                                "all": [{"point": 3.0}],
+                                                "first": {},
+                                            },
+                                            "difference": {},
+                                            "relative_uplift": {},
+                                            "significance": {
+                                                "control": {"overall": {}},
+                                                "treatment": {
+                                                    "overall": {"1": "negative"}
+                                                },
+                                            },
+                                        },
+                                    },
+                                },
+                                "is_control": True,
+                            }
+                        }
+                    }
+                },
+                "weekly": {
+                    "enrollments": {
+                        "all": {
+                            "control": {
+                                "branch_data": {
+                                    "search_metrics": {
+                                        "search_count": {
+                                            "absolute": {
+                                                "all": [
+                                                    {"point": 1.0, "window_index": "1"},
+                                                    {"point": 2.0, "window_index": "2"},
+                                                ],
+                                                "first": {},
+                                            },
+                                            "significance": {},
+                                        }
+                                    }
+                                },
+                                "is_control": True,
+                            }
+                        }
+                    }
+                },
+                "daily": {"enrollments": {"all": {}}},
+                "other_metrics": {
+                    "other_metrics": {
+                        "neutral_metric": "Neutral",
+                        "negative_metric": "Negative",
+                    }
+                },
+            }
+        }
+
+        NimbusExperimentFactory.create_with_lifecycle(
+            NimbusExperimentFactory.Lifecycles.ENDING_APPROVE_APPROVE,
+            name="Results Filter Test",
+            slug="results-filter-test",
+            application=application,
+            feature_configs=[feature_config],
+            results_data=results_data,
+        )
+
+        data = self._get_yaml()
+        exp = next(e for e in data if e["slug"] == "results-filter-test")
+        rd = exp["results_data"]
+
+        # Only overall window is included
+        self.assertIn("overall", rd["v3"])
+        self.assertNotIn("weekly", rd["v3"])
+        self.assertNotIn("daily", rd["v3"])
+
+        branch = rd["v3"]["overall"]["enrollments"]["all"]["control"]
+
+        # Significant metrics are kept
+        self.assertIn("search_count", branch["branch_data"]["search_metrics"])
+        self.assertIn("negative_metric", branch["branch_data"]["other_metrics"])
+
+        # Neutral metrics are excluded
+        self.assertNotIn("neutral_metric", branch["branch_data"]["other_metrics"])
+
+        # other_metrics metadata is preserved
+        self.assertIn("other_metrics", rd["v3"])
+
+    def test_results_data_empty_or_missing_returns_none(self):
+        """Null, missing v3, or missing overall return no results_data."""
+        application = NimbusExperiment.Application.DESKTOP
+        feature_config = NimbusFeatureConfigFactory.create(application=application)
+
+        for slug, rd in [
+            ("no-results", None),
+            ("no-v3", {"other_key": {}}),
+            ("no-overall", {"v3": {"weekly": {}}}),
+        ]:
+            NimbusExperimentFactory.create_with_lifecycle(
+                NimbusExperimentFactory.Lifecycles.ENDING_APPROVE_APPROVE,
+                name=slug,
+                slug=slug,
+                application=application,
+                feature_configs=[feature_config],
+                results_data=rd,
+            )
+
+        data = self._get_yaml()
+        for slug in ["no-results", "no-v3", "no-overall"]:
+            exp = next(e for e in data if e["slug"] == slug)
+            self.assertNotIn("results_data", exp)
+
+    def test_results_data_all_neutral_returns_none(self):
+        """Experiments with only neutral results should return None for results_data."""
+        application = NimbusExperiment.Application.DESKTOP
+        feature_config = NimbusFeatureConfigFactory.create(application=application)
+        NimbusExperimentFactory.create_with_lifecycle(
+            NimbusExperimentFactory.Lifecycles.ENDING_APPROVE_APPROVE,
+            name="All Neutral Experiment",
+            slug="all-neutral-experiment",
+            application=application,
+            feature_configs=[feature_config],
+            results_data={
+                "v3": {
+                    "overall": {
+                        "enrollments": {
+                            "all": {
+                                "control": {
+                                    "branch_data": {
+                                        "other_metrics": {
+                                            "some_metric": {
+                                                "absolute": {
+                                                    "all": [{"point": 5.0}],
+                                                    "first": {},
+                                                },
+                                                "significance": {
+                                                    "control": {"overall": {}},
+                                                    "treatment": {
+                                                        "overall": {"1": "neutral"}
+                                                    },
+                                                },
+                                            }
+                                        }
+                                    },
+                                    "is_control": True,
+                                }
+                            }
+                        }
+                    }
+                }
+            },
+        )
+
+        data = self._get_yaml()
+        exp = next(e for e in data if e["slug"] == "all-neutral-experiment")
+        # All metrics are neutral, so results_data stripped by _strip_empty
+        self.assertNotIn("results_data", exp)
+
 
 class TestFmlErrorsView(MockFmlErrorMixin, TestCase):
     def test_returns_fml_errors(self):