Skip to content

Commit 971b286

Browse files
feat(nimbus): filter YAML export results_data to overall significant metrics (#14952)
Because * The raw results_data JSON includes weekly/daily window data and neutral (non-significant) metrics, producing far more data than needed * Consumers of the YAML export (e.g., chatbot assistant) only need to know which metrics an experiment significantly shifted This commit * Changes results_data from a raw field to a SerializerMethodField on NimbusExperimentYamlSerializer * Filters to only include the overall analysis window (excludes weekly/daily) * Filters to only include metrics with at least one significant (positive or negative) branch comparison, excluding neutral metrics * Preserves other_metrics metadata for context * Returns None (omitted from YAML) when no significant results exist * Adds tests for filtering, null/missing data, and all-neutral edge cases Fixes #14951 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 158eb0d commit 971b286

4 files changed

Lines changed: 322 additions & 11 deletions

File tree

docs/experimenter/openapi-schema.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1471,8 +1471,8 @@
14711471
"readOnly": true
14721472
},
14731473
"results_data": {
1474-
"type": "object",
1475-
"nullable": true
1474+
"type": "string",
1475+
"readOnly": true
14761476
}
14771477
},
14781478
"required": [

docs/experimenter/swagger-ui.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,8 +1483,8 @@
14831483
"readOnly": true
14841484
},
14851485
"results_data": {
1486-
"type": "object",
1487-
"nullable": true
1486+
"type": "string",
1487+
"readOnly": true
14881488
}
14891489
},
14901490
"required": [

experimenter/experimenter/experiments/api/v5/serializers.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ class NimbusExperimentYamlSerializer(serializers.ModelSerializer):
275275
excluded_experiments = serializers.SerializerMethodField()
276276
application_display = serializers.SerializerMethodField()
277277
parent_experiment = serializers.SerializerMethodField()
278+
results_data = serializers.SerializerMethodField()
278279

279280
class Meta:
280281
model = NimbusExperiment
@@ -458,6 +459,65 @@ def get_parent_experiment(self, obj):
458459
return f"{obj.parent.name} ({obj.parent.slug})"
459460
return None
460461

462+
def get_results_data(self, obj):
463+
if not obj.results_data:
464+
return None
465+
466+
v3 = obj.results_data.get("v3")
467+
if not v3:
468+
return None
469+
470+
overall = v3.get("overall")
471+
if not overall:
472+
return None
473+
474+
filtered_overall = {}
475+
for basis, segments in overall.items():
476+
filtered_segments = {}
477+
for segment, branches in segments.items():
478+
filtered_branches = {}
479+
for branch, branch_data_wrapper in branches.items():
480+
bd = branch_data_wrapper.get("branch_data", {})
481+
filtered_groups = {}
482+
for group, metrics in bd.items():
483+
filtered_metrics = {
484+
metric: metric_data
485+
for metric, metric_data in metrics.items()
486+
if self._metric_is_significant(metric_data)
487+
}
488+
if filtered_metrics:
489+
filtered_groups[group] = filtered_metrics
490+
491+
if filtered_groups:
492+
filtered_branches[branch] = {
493+
"branch_data": filtered_groups,
494+
"is_control": branch_data_wrapper.get("is_control", False),
495+
}
496+
497+
if filtered_branches:
498+
filtered_segments[segment] = filtered_branches
499+
500+
if filtered_segments:
501+
filtered_overall[basis] = filtered_segments
502+
503+
if not filtered_overall:
504+
return None
505+
506+
result = {"v3": {"overall": filtered_overall}}
507+
if "other_metrics" in v3:
508+
result["v3"]["other_metrics"] = v3["other_metrics"]
509+
return result
510+
511+
@staticmethod
512+
def _metric_is_significant(metric_data):
513+
significance = metric_data.get("significance", {})
514+
for branch_sig in significance.values():
515+
overall_sig = branch_sig.get("overall", {})
516+
for value in overall_sig.values():
517+
if value in ("positive", "negative"):
518+
return True
519+
return False
520+
461521

462522
class NimbusBranchScreenshotReviewSerializer(NimbusBranchScreenshotSerializer):
463523
# Round-trip serialization & validation for review can use a string path

experimenter/experimenter/experiments/tests/api/v5/test_views.py

Lines changed: 258 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,33 @@ def test_yaml_contains_all_fields(self):
258258
results_data={
259259
"v3": {
260260
"overall": {
261-
"enrollments": {"all": {"percentage": 100.0, "population": 1000}}
262-
}
261+
"enrollments": {
262+
"all": {
263+
"control": {
264+
"branch_data": {
265+
"search_metrics": {
266+
"search_count": {
267+
"absolute": {
268+
"all": [{"point": 10.0}],
269+
"first": {},
270+
},
271+
"difference": {},
272+
"relative_uplift": {},
273+
"significance": {
274+
"control": {"overall": {}},
275+
"treatment": {
276+
"overall": {"1": "positive"}
277+
},
278+
},
279+
}
280+
}
281+
},
282+
"is_control": True,
283+
}
284+
}
285+
}
286+
},
287+
"weekly": {"enrollments": {"all": {"control": {"branch_data": {}}}}},
263288
}
264289
},
265290
)
@@ -323,11 +348,14 @@ def test_yaml_contains_all_fields(self):
323348
slugs = [fc["slug"] for fc in exp["feature_configs"]]
324349
self.assertIn("test-feature", slugs)
325350

326-
# Results data
327-
self.assertEqual(
328-
exp["results_data"]["v3"]["overall"]["enrollments"]["all"]["population"],
329-
1000,
330-
)
351+
# Results data: only overall with significant metrics, weekly excluded
352+
rd = exp["results_data"]
353+
self.assertIn("overall", rd["v3"])
354+
self.assertNotIn("weekly", rd["v3"])
355+
search = rd["v3"]["overall"]["enrollments"]["all"]["control"]["branch_data"][
356+
"search_metrics"
357+
]["search_count"]
358+
self.assertEqual(search["significance"]["treatment"]["overall"]["1"], "positive")
331359

332360
def test_default_hypothesis_excluded(self):
333361
application = NimbusExperiment.Application.DESKTOP
@@ -456,6 +484,229 @@ def test_pagination_multiple_pages(self):
456484
finally:
457485
YamlExportPagination.page_size = original_page_size
458486

487+
def test_results_data_filters_to_overall_significant_only(self):
488+
"""Results data should only include overall window and significant metrics."""
489+
application = NimbusExperiment.Application.DESKTOP
490+
feature_config = NimbusFeatureConfigFactory.create(application=application)
491+
492+
results_data = {
493+
"v3": {
494+
"overall": {
495+
"enrollments": {
496+
"all": {
497+
"control": {
498+
"branch_data": {
499+
"search_metrics": {
500+
"search_count": {
501+
"absolute": {
502+
"all": [
503+
{
504+
"point": 10.0,
505+
"lower": 9.5,
506+
"upper": 10.5,
507+
}
508+
],
509+
"first": {},
510+
},
511+
"difference": {
512+
"treatment": {
513+
"all": [
514+
{
515+
"point": 0.5,
516+
"lower": 0.1,
517+
"upper": 0.9,
518+
}
519+
]
520+
}
521+
},
522+
"relative_uplift": {
523+
"treatment": {
524+
"all": [
525+
{
526+
"point": 0.05,
527+
"lower": 0.01,
528+
"upper": 0.09,
529+
}
530+
]
531+
}
532+
},
533+
"significance": {
534+
"control": {"overall": {}},
535+
"treatment": {
536+
"overall": {"1": "positive"}
537+
},
538+
},
539+
}
540+
},
541+
"other_metrics": {
542+
"neutral_metric": {
543+
"absolute": {
544+
"all": [{"point": 5.0}],
545+
"first": {},
546+
},
547+
"difference": {},
548+
"relative_uplift": {},
549+
"significance": {
550+
"control": {"overall": {}},
551+
"treatment": {
552+
"overall": {"1": "neutral"}
553+
},
554+
},
555+
},
556+
"negative_metric": {
557+
"absolute": {
558+
"all": [{"point": 3.0}],
559+
"first": {},
560+
},
561+
"difference": {},
562+
"relative_uplift": {},
563+
"significance": {
564+
"control": {"overall": {}},
565+
"treatment": {
566+
"overall": {"1": "negative"}
567+
},
568+
},
569+
},
570+
},
571+
},
572+
"is_control": True,
573+
}
574+
}
575+
}
576+
},
577+
"weekly": {
578+
"enrollments": {
579+
"all": {
580+
"control": {
581+
"branch_data": {
582+
"search_metrics": {
583+
"search_count": {
584+
"absolute": {
585+
"all": [
586+
{"point": 1.0, "window_index": "1"},
587+
{"point": 2.0, "window_index": "2"},
588+
],
589+
"first": {},
590+
},
591+
"significance": {},
592+
}
593+
}
594+
},
595+
"is_control": True,
596+
}
597+
}
598+
}
599+
},
600+
"daily": {"enrollments": {"all": {}}},
601+
"other_metrics": {
602+
"other_metrics": {
603+
"neutral_metric": "Neutral",
604+
"negative_metric": "Negative",
605+
}
606+
},
607+
}
608+
}
609+
610+
NimbusExperimentFactory.create_with_lifecycle(
611+
NimbusExperimentFactory.Lifecycles.ENDING_APPROVE_APPROVE,
612+
name="Results Filter Test",
613+
slug="results-filter-test",
614+
application=application,
615+
feature_configs=[feature_config],
616+
results_data=results_data,
617+
)
618+
619+
data = self._get_yaml()
620+
exp = next(e for e in data if e["slug"] == "results-filter-test")
621+
rd = exp["results_data"]
622+
623+
# Only overall window is included
624+
self.assertIn("overall", rd["v3"])
625+
self.assertNotIn("weekly", rd["v3"])
626+
self.assertNotIn("daily", rd["v3"])
627+
628+
branch = rd["v3"]["overall"]["enrollments"]["all"]["control"]
629+
630+
# Significant metrics are kept
631+
self.assertIn("search_count", branch["branch_data"]["search_metrics"])
632+
self.assertIn("negative_metric", branch["branch_data"]["other_metrics"])
633+
634+
# Neutral metrics are excluded
635+
self.assertNotIn("neutral_metric", branch["branch_data"]["other_metrics"])
636+
637+
# other_metrics metadata is preserved
638+
self.assertIn("other_metrics", rd["v3"])
639+
640+
def test_results_data_empty_or_missing_returns_none(self):
641+
"""Null, missing v3, or missing overall return no results_data."""
642+
application = NimbusExperiment.Application.DESKTOP
643+
feature_config = NimbusFeatureConfigFactory.create(application=application)
644+
645+
for slug, rd in [
646+
("no-results", None),
647+
("no-v3", {"other_key": {}}),
648+
("no-overall", {"v3": {"weekly": {}}}),
649+
]:
650+
NimbusExperimentFactory.create_with_lifecycle(
651+
NimbusExperimentFactory.Lifecycles.ENDING_APPROVE_APPROVE,
652+
name=slug,
653+
slug=slug,
654+
application=application,
655+
feature_configs=[feature_config],
656+
results_data=rd,
657+
)
658+
659+
data = self._get_yaml()
660+
for slug in ["no-results", "no-v3", "no-overall"]:
661+
exp = next(e for e in data if e["slug"] == slug)
662+
self.assertNotIn("results_data", exp)
663+
664+
def test_results_data_all_neutral_returns_none(self):
665+
"""Experiments with only neutral results should return None for results_data."""
666+
application = NimbusExperiment.Application.DESKTOP
667+
feature_config = NimbusFeatureConfigFactory.create(application=application)
668+
NimbusExperimentFactory.create_with_lifecycle(
669+
NimbusExperimentFactory.Lifecycles.ENDING_APPROVE_APPROVE,
670+
name="All Neutral Experiment",
671+
slug="all-neutral-experiment",
672+
application=application,
673+
feature_configs=[feature_config],
674+
results_data={
675+
"v3": {
676+
"overall": {
677+
"enrollments": {
678+
"all": {
679+
"control": {
680+
"branch_data": {
681+
"other_metrics": {
682+
"some_metric": {
683+
"absolute": {
684+
"all": [{"point": 5.0}],
685+
"first": {},
686+
},
687+
"significance": {
688+
"control": {"overall": {}},
689+
"treatment": {
690+
"overall": {"1": "neutral"}
691+
},
692+
},
693+
}
694+
}
695+
},
696+
"is_control": True,
697+
}
698+
}
699+
}
700+
}
701+
}
702+
},
703+
)
704+
705+
data = self._get_yaml()
706+
exp = next(e for e in data if e["slug"] == "all-neutral-experiment")
707+
# All metrics are neutral, so results_data stripped by _strip_empty
708+
self.assertNotIn("results_data", exp)
709+
459710

460711
class TestFmlErrorsView(MockFmlErrorMixin, TestCase):
461712
def test_returns_fml_errors(self):

0 commit comments

Comments
 (0)