diff --git a/jobs/webcompat-kb/tests/test_bugzilla.py b/jobs/webcompat-kb/tests/test_bugzilla.py index 62935e34..a9901b48 100644 --- a/jobs/webcompat-kb/tests/test_bugzilla.py +++ b/jobs/webcompat-kb/tests/test_bugzilla.py @@ -1,30 +1,52 @@ +import tempfile +from collections import defaultdict from datetime import datetime, timezone from unittest.mock import Mock, patch -from typing import Any, Mapping +from typing import Any, Iterable, Mapping import pytest -from bugdantic.bugzilla import History +import bugdantic.bugzilla from webcompat_kb.base import get_client -from webcompat_kb.bugzilla import BugHistoryChange, BugHistoryEntry -from webcompat_kb.bugzilla import BugzillaToBigQuery -from webcompat_kb.bugzilla import extract_int_from_field -from webcompat_kb.bugzilla import parse_string_to_json -from webcompat_kb.bugzilla import RELATION_CONFIG, LINK_FIELDS, ETP_RELATION_CONFIG +from webcompat_kb.bugzilla import ( + Bug, + BugHistoryChange, + BugHistoryEntry, + BugHistoryUpdater, + EXTERNAL_LINK_CONFIGS, + PropertyHistory, + extract_int_from_field, + get_etp_breakage_reports, + get_kb_bug_core_bugs, + get_kb_bug_site_report, + group_bugs, + load_bugs, + parse_user_story, + write_bugs, +) + +def to_bugs_by_id(data: Iterable[dict[str, Any]]) -> Mapping[int, Bug]: + return {bug_data["id"]: Bug.from_json(bug_data) for bug_data in data} -def to_history(data: list[dict[str, Any]]) -> Mapping[int, list[History]]: + +def to_history( + data: list[dict[str, Any]], +) -> Mapping[int, list[bugdantic.bugzilla.History]]: return { - item["id"]: [History.model_validate(entry) for entry in item["history"]] + item["id"]: [ + bugdantic.bugzilla.History.model_validate(entry) + for entry in item["history"] + ] for item in data } -def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: - rv = [] +def to_history_entry(data: list[dict[str, Any]]) -> dict[int, BugHistoryEntry]: + rv = defaultdict(list) for item in data: changes = [BugHistoryChange(**change) for change in item["changes"]] - rv.append( + rv[item["number"]].append( BugHistoryEntry( number=item["number"], who=item["who"], @@ -35,10 +57,22 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: return rv -SAMPLE_BUGS = { - item["id"]: item - for item in [ +SAMPLE_KB_BUGS = to_bugs_by_id( + [ { + "assigned_to": "test@example.org", + "blocks": [], + "component": "Knowledge Base", + "creation_time": "2000-07-25T13:50:04Z", + "creator": "nobody@mozilla.org", + "depends_on": [903746], + "id": 1835339, + "keywords": [], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Web Compatibility", + "resolution": "", "see_also": [ "https://github.com/webcompat/web-bugs/issues/13503", "https://github.com/webcompat/web-bugs/issues/91682", @@ -47,34 +81,33 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "https://bugzilla.mozilla.org/show_bug.cgi?id=1739489", "https://bugzilla.mozilla.org/show_bug.cgi?id=1739791", "https://github.com/webcompat/web-bugs/issues/109064", - "https://github.com/mozilla-extensions/webcompat-addon/blob/5b391018e847a1eb30eba4784c86acd1c638ed26/src/injections/js/bug1739489-draftjs-beforeinput.js", # noqa + "https://github.com/mozilla-extensions/webcompat-addon/blob/5b391018e847a1eb30eba4784c86acd1c638ed26/src/injections/js/bug1739489-draftjs-beforeinput.js", "https://github.com/webcompat/web-bugs/issues/112848", "https://github.com/webcompat/web-bugs/issues/117039", ], - "cf_user_story": "url:cmcreg.bancosantander.es/*\r\nurl:new.reddit.com/*\r\nurl:web.whatsapp.com/*\r\nurl:facebook.com/*\r\nurl:twitter.com/*\r\nurl:reddit.com/*\r\nurl:mobilevikings.be/*\r\nurl:book.ersthelfer.tv/*", # noqa - "severity": "--", - "priority": "--", - "depends_on": [903746], - "component": "Knowledge Base", - "product": "Web Compatibility", - "resolution": "", + "severity": None, "status": "NEW", - "blocks": [], - "id": 1835339, "summary": "Missing implementation of textinput event", - "assigned_to": "test@example.org", - "creator": "nobody@mozilla.org", - "creation_time": datetime.fromisoformat("2000-07-25T13:50:04Z"), - "keywords": [], "url": "", + "user_story": "url:cmcreg.bancosantander.es/*\r\nurl:new.reddit.com/*\r\nurl:web.whatsapp.com/*\r\nurl:facebook.com/*\r\nurl:twitter.com/*\r\nurl:reddit.com/*\r\nurl:mobilevikings.be/*\r\nurl:book.ersthelfer.tv/*", # noqa + "webcompat_priority": None, + "webcompat_score": None, "whiteboard": "", - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", }, { + "assigned_to": "nobody@mozilla.org", + "blocks": [], "component": "Knowledge Base", - "product": "Web Compatibility", + "creation_time": "2000-07-25T13:50:04Z", + "creator": "nobody@mozilla.org", "depends_on": [], + "id": 1835416, + "keywords": [], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Web Compatibility", + "resolution": "", "see_also": [ "https://github.com/webcompat/web-bugs/issues/100260", "https://github.com/webcompat/web-bugs/issues/22829", @@ -86,27 +119,29 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "https://github.com/webcompat/web-bugs/issues/122127", "https://github.com/webcompat/web-bugs/issues/120886", ], - "summary": "Sites breaking due to the lack of WebUSB support", - "id": 1835416, - "blocks": [], - "resolution": "", - "priority": "--", - "severity": "--", - "cf_user_story": "url:webminidisc.com/*\r\nurl:app.webadb.com/*\r\nurl:www.numworks.com/*\r\nurl:webadb.github.io/*\r\nurl:www.stemplayer.com/*\r\nurl:wootility.io/*\r\nurl:python.microbit.org/*\r\nurl:flash.android.com/*", # noqa + "severity": None, "status": "NEW", - "assigned_to": "nobody@mozilla.org", - "creator": "nobody@mozilla.org", - "creation_time": datetime.fromisoformat("2000-07-25T13:50:04Z"), - "keywords": [], + "summary": "Sites breaking due to the lack of WebUSB support", "url": "", + "user_story": "url:webminidisc.com/*\r\nurl:app.webadb.com/*\r\nurl:www.numworks.com/*\r\nurl:webadb.github.io/*\r\nurl:www.stemplayer.com/*\r\nurl:wootility.io/*\r\nurl:python.microbit.org/*\r\nurl:flash.android.com/*", # noqa + "webcompat_priority": None, + "webcompat_score": None, "whiteboard": "", - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", }, { + "assigned_to": "nobody@mozilla.org", + "blocks": [222222, 1734557], "component": "Knowledge Base", - "product": "Web Compatibility", + "creation_time": "2000-07-25T13:50:04Z", + "creator": "nobody@mozilla.org", "depends_on": [555555], + "id": 111111, + "keywords": [], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Web Compatibility", + "resolution": "", "see_also": [ "https://crbug.com/606208", "https://github.com/whatwg/html/issues/1896", @@ -115,39 +150,34 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "https://github.com/mozilla/standards-positions/issues/20", "https://github.com/WebKit/standards-positions/issues/186", ], - "summary": "Test bug", - "id": 111111, - "blocks": [222222, 1734557], - "resolution": "", - "priority": "--", - "severity": "--", - "cf_user_story": "", + "severity": None, "status": "NEW", - "assigned_to": "nobody@mozilla.org", - "creator": "nobody@mozilla.org", - "creation_time": datetime.fromisoformat("2000-07-25T13:50:04Z"), - "keywords": [], + "summary": "Test bug", "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, "whiteboard": "", - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", }, ] -} +) -SAMPLE_CORE_BUGS = { - item["id"]: item - for item in [ +SAMPLE_CORE_BUGS = to_bugs_by_id( + [ { - "id": 903746, - "severity": "--", - "priority": "--", - "cf_user_story": "", - "depends_on": [], - "status": "UNCONFIRMED", - "product": "Core", + "assigned_to": "nobody@mozilla.org", "blocks": [1754236, 1835339], "component": "DOM: Events", + "creation_time": "2000-07-25T13:50:04Z", + "creator": "nobody@mozilla.org", + "depends_on": [], + "id": 903746, + "keywords": [], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Core", + "resolution": "", "see_also": [ "https://bugzilla.mozilla.org/show_bug.cgi?id=1739489", "https://bugzilla.mozilla.org/show_bug.cgi?id=1739791", @@ -158,153 +188,204 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "https://github.com/webcompat/web-bugs/issues/117039", "https://github.com/w3c/uievents/issues/353", ], - "resolution": "", + "severity": None, + "status": "UNCONFIRMED", "summary": "Missing textinput event", - "assigned_to": "nobody@mozilla.org", + "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, { - "id": 555555, - "severity": "--", - "priority": "--", - "cf_user_story": "", + "assigned_to": "nobody@mozilla.org", + "blocks": [111111], + "component": "Test", + "creation_time": "2000-07-25T13:50:04Z", + "creator": "nobody@mozilla.org", "depends_on": [], - "status": "UNCONFIRMED", + "id": 555555, + "keywords": [], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, "product": "Core", - "blocks": [], - "component": "Test", - "see_also": ["https://mozilla.github.io/standards-positions/#testposition"], "resolution": "", + "see_also": ["https://mozilla.github.io/standards-positions/#testposition"], + "severity": None, + "status": "UNCONFIRMED", "summary": "Test Core bug", + "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", + }, + { "assigned_to": "nobody@mozilla.org", + "blocks": [], + "component": "Test", + "creation_time": "2000-07-25T13:50:04Z", + "creator": "nobody@mozilla.org", + "depends_on": [], + "id": 999999, + "keywords": [], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Core", + "resolution": "", + "see_also": [], + "severity": None, + "status": "NEW", + "summary": "Another Test Core bug", + "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, ] -} +) -SAMPLE_BREAKAGE_BUGS = { - item["id"]: item - for item in [ +SAMPLE_BREAKAGE_BUGS = to_bugs_by_id( + [ { + "assigned_to": "nobody@mozilla.org", + "blocks": [], + "component": "Site Reports", + "creation_time": "2000-07-25T13:50:04Z", + "creator": "nobody@mozilla.org", + "depends_on": [111111], "id": 1734557, + "keywords": [], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, "product": "Web Compatibility", - "cf_user_story": "url:angusnicneven.com/*", - "blocks": [], - "status": "ASSIGNED", - "summary": "Javascript causes infinite scroll because event.path is undefined", "resolution": "", - "depends_on": [111111], "see_also": [], - "component": "Desktop", - "severity": "--", - "priority": "--", - "assigned_to": "nobody@mozilla.org", - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", + "severity": None, + "status": "ASSIGNED", + "summary": "Javascript causes infinite scroll because event.path is undefined", + "url": "", + "user_story": "url:angusnicneven.com/*", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, { + "assigned_to": "nobody@mozilla.org", + "blocks": [], + "component": "Site Reports", + "creation_time": "2000-07-25T13:50:04Z", + "creator": "nobody@mozilla.org", + "depends_on": [111111], "id": 222222, + "keywords": [], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, "product": "Web Compatibility", - "cf_user_story": "url:example.com/*", - "blocks": [], - "status": "ASSIGNED", - "summary": "Test breakage bug", "resolution": "", - "depends_on": [111111], - "see_also": [], - "component": "Desktop", - "severity": "--", - "priority": "--", - "assigned_to": "nobody@mozilla.org", - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", - }, - { - "whiteboard": "", "see_also": [], - "severity": "S3", - "product": "Core", - "depends_on": [999999], - "summary": "Example core site report and platform bug", - "resolution": "", - "last_change_time": datetime.fromisoformat("2024-05-27T15:07:03Z"), - "keywords": ["webcompat:platform-bug", "webcompat:site-report"], - "priority": "P3", - "creation_time": datetime.fromisoformat("2024-03-21T16:40:27Z"), - "cf_user_story": "", - "status": "NEW", - "blocks": [], + "severity": None, + "status": "ASSIGNED", + "summary": "Test breakage bug", "url": "", - "cf_last_resolved": None, - "component": "JavaScript Engine", - "id": 444444, - "assigned_to": "nobody@mozilla.org", - "cf_webcompat_priority": "P3", - "cf_webcompat_score": "2", + "user_story": "url:example.com/*", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, ] -} +) -SAMPLE_ETP_BUGS = { - item["id"]: item - for item in [ +SAMPLE_ETP_BUGS = to_bugs_by_id( + [ { - "url": "https://gothamist.com/", - "summary": "gothamist.com - The comments are not displayed with ETP set to Strict", + "assigned_to": "nobody@mozilla.org", + "blocks": [1101005], + "component": "Privacy: Site Reports", + "creation_time": "2024-07-30T07:37:28Z", + "creator": "nobody@mozilla.org", + "depends_on": [1875061], "id": 1910548, "keywords": ["priv-webcompat", "webcompat:site-report"], - "component": "Privacy: Site Reports", + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Web Compatibility", "resolution": "", - "blocks": [1101005], - "depends_on": [1875061], - "creation_time": datetime.fromisoformat("2024-07-30T07:37:28Z"), "see_also": ["https://github.com/webcompat/web-bugs/issues/139647"], - "product": "Web Compatibility", + "severity": None, "status": "NEW", - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", + "summary": "gothamist.com - The comments are not displayed with ETP set to Strict", + "url": "https://gothamist.com/", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, { - "see_also": ["https://github.com/webcompat/web-bugs/issues/142250"], + "assigned_to": "nobody@mozilla.org", + "blocks": [], + "component": "Privacy: Site Reports", + "creation_time": "2024-10-01T08:50:58Z", + "creator": "nobody@mozilla.org", + "depends_on": [1101005, 1797458], "id": 1921943, - "summary": "my.farys.be - Login option is missing with ETP set to STRICT", - "product": "Web Compatibility", "keywords": [ "priv-webcompat", "webcompat:platform-bug", "webcompat:site-report", ], - "status": "NEW", + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Web Compatibility", "resolution": "", - "component": "Privacy: Site Reports", - "blocks": [], - "depends_on": [1101005, 1797458], - "creation_time": datetime.fromisoformat("2024-10-01T08:50:58Z"), + "see_also": ["https://github.com/webcompat/web-bugs/issues/142250"], + "severity": None, + "status": "NEW", + "summary": "my.farys.be - Login option is missing with ETP set to STRICT", "url": "https://my.farys.be/myfarys/", - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, { - "see_also": [], - "summary": "ryanair.com - The form to start a chat does not load with ETP set to STRICT", - "id": 1928102, - "product": "Web Compatibility", - "status": "NEW", - "keywords": ["webcompat:site-report"], + "assigned_to": "nobody@mozilla.org", "blocks": [], "component": "Privacy: Site Reports", - "resolution": "", + "creation_time": "2024-10-30T15:04:41Z", + "creator": "nobody@mozilla.org", "depends_on": [1101005, 1122334], + "id": 1928102, + "keywords": ["webcompat:site-report"], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Web Compatibility", + "resolution": "", + "see_also": [], + "severity": None, + "status": "NEW", + "summary": "ryanair.com - The form to start a chat does not load with ETP set to STRICT", "url": "https://www.ryanair.com/gb/en/lp/chat", - "creation_time": datetime.fromisoformat("2024-10-30T15:04:41Z"), - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, ] -} +) -SAMPLE_ETP_DEPENDENCIES_BUGS = { - item["id"]: item - for item in [ +SAMPLE_ETP_DEPENDENCIES_BUGS = to_bugs_by_id( + [ { + "assigned_to": "nobody@mozilla.org", "blocks": [ 1526695, 1903311, @@ -312,8 +393,25 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: 1903340, 1903345, ], + "component": "Privacy: Anti-Tracking", + "creation_time": "2014-11-18T16:11:29Z", + "creator": "nobody@mozilla.org", + "depends_on": [ + 1400025, + 1446243, + 1465962, + 1470298, + 1470301, + 1486425, + 1627322, + ], + "id": 1101005, + "keywords": ["meta", "webcompat:platform-bug"], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Core", "resolution": "", - "status": "NEW", "see_also": [ "https://webcompat.com/issues/2999", "https://webcompat.com/issues/10020", @@ -339,35 +437,21 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "https://webcompat.com/issues/38315", "https://webcompat.com/issues/35647", ], - "creation_time": datetime.fromisoformat("2014-11-18T16:11:29Z"), + "severity": None, + "status": "NEW", "summary": "[meta] ETP Strict mode or Private Browsing mode tracking protection breakage", "url": "", - "id": 1101005, - "component": "Privacy: Anti-Tracking", - "depends_on": [ - 1400025, - 1446243, - 1465962, - 1470298, - 1470301, - 1486425, - 1627322, - ], - "keywords": ["meta", "webcompat:platform-bug"], - "product": "Core", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, { - "status": "NEW", - "resolution": "", + "assigned_to": "nobody@mozilla.org", "blocks": [1101005, 1773684, 1921943], - "summary": "[meta] Email Tracking Breakage", - "creation_time": datetime.fromisoformat("2022-10-26T09:33:25Z"), - "see_also": [], "component": "Privacy: Anti-Tracking", - "url": "", - "id": 1797458, - "product": "Core", - "keywords": ["meta"], + "creation_time": "2022-10-26T09:33:25Z", + "creator": "nobody@mozilla.org", "depends_on": [ 1796560, 1799094, @@ -375,23 +459,25 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: 1800007, 1803127, ], - }, - { - "status": "NEW", - "resolution": "", - "creation_time": datetime.fromisoformat("2024-01-17T13:40:16Z"), - "see_also": [ - "https://bugzilla.mozilla.org/show_bug.cgi?id=1869326", - "https://bugzilla.mozilla.org/show_bug.cgi?id=1872855", - "https://bugzilla.mozilla.org/show_bug.cgi?id=1874855", - "https://bugzilla.mozilla.org/show_bug.cgi?id=1878855", - "https://bugzilla.mozilla.org/show_bug.cgi?id=1428122", - "https://bugzilla.mozilla.org/show_bug.cgi?id=1892176", - ], - "url": "", + "id": 1797458, "keywords": ["meta"], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, "product": "Core", - "depends_on": [1884676, 1906418, 1894615], + "resolution": "", + "see_also": [], + "severity": None, + "status": "NEW", + "summary": "[meta] Email Tracking Breakage", + "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", + }, + { + "assigned_to": "nobody@mozilla.org", "blocks": [ 1101005, 1901474, @@ -403,95 +489,145 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: 1916183, 1916443, ], - "summary": "[meta] ETP breakage for webpages that have Disqus comment section", "component": "Privacy: Anti-Tracking", + "creation_time": "2024-01-17T13:40:16Z", + "creator": "nobody@mozilla.org", + "depends_on": [1884676, 1906418, 1894615], "id": 1875061, - }, - { - "status": "NEW", + "keywords": ["meta"], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Core", "resolution": "", - "creation_time": datetime.fromisoformat("2024-01-17T13:40:16Z"), - "see_also": [], + "see_also": [ + "https://bugzilla.mozilla.org/show_bug.cgi?id=1869326", + "https://bugzilla.mozilla.org/show_bug.cgi?id=1872855", + "https://bugzilla.mozilla.org/show_bug.cgi?id=1874855", + "https://bugzilla.mozilla.org/show_bug.cgi?id=1878855", + "https://bugzilla.mozilla.org/show_bug.cgi?id=1428122", + "https://bugzilla.mozilla.org/show_bug.cgi?id=1892176", + ], + "severity": None, + "status": "NEW", + "summary": "[meta] ETP breakage for webpages that have Disqus comment section", "url": "", - "keywords": [], - "product": "Core", - "depends_on": [444444, 555555], + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", + }, + { + "assigned_to": "nobody@mozilla.org", "blocks": [], - "summary": "Sample non meta ETP dependency", "component": "Privacy: Anti-Tracking", + "creation_time": "2024-01-17T13:40:16Z", + "creator": "nobody@mozilla.org", + "depends_on": [444444, 555555], "id": 1122334, + "keywords": [], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Core", + "resolution": "", + "see_also": [], + "severity": None, + "status": "NEW", + "summary": "Sample non meta ETP dependency", + "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, ] -} +) -SAMPLE_CORE_AS_KB_BUGS = { - item["id"]: item - for item in [ +SAMPLE_CORE_AS_KB_BUGS = to_bugs_by_id( + [ { - "whiteboard": "", - "see_also": ["https://bugzilla.mozilla.org/show_bug.cgi?id=1740472"], - "severity": "S3", - "product": "Core", + "assigned_to": "nobody@mozilla.org", + "blocks": [1539848, 1729514, 1896383], + "component": "JavaScript Engine", + "creation_time": "2024-03-21T16:40:27Z", + "creator": "nobody@mozilla.org", "depends_on": [], - "summary": "Consider adding support for Error.captureStackTrace", - "resolution": "", - "last_change_time": datetime.fromisoformat("2024-05-27T15:07:03Z"), + "id": 1886820, "keywords": ["parity-chrome", "parity-safari", "webcompat:platform-bug"], - "priority": "P3", - "creation_time": datetime.fromisoformat("2024-03-21T16:40:27Z"), - "cf_user_story": "", + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": 3, + "product": "Core", + "resolution": "", + "see_also": ["https://bugzilla.mozilla.org/show_bug.cgi?id=1740472"], + "severity": 3, "status": "NEW", - "blocks": [1539848, 1729514, 1896383], + "summary": "Consider adding support for Error.captureStackTrace", "url": "", - "cf_last_resolved": None, - "component": "JavaScript Engine", - "id": 1886820, - "assigned_to": "nobody@mozilla.org", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, { + "assigned_to": "nobody@mozilla.org", + "blocks": [1656444, 1835339, 222222], + "component": "DOM: Window and Location", + "creation_time": "2024-04-30T14:04:23Z", + "creator": "nobody@mozilla.org", "depends_on": [1896672], + "id": 1894244, + "keywords": ["webcompat:platform-bug"], + "last_change_time": "2024-05-14T15:19:21Z", + "last_resolved": None, + "priority": 3, "product": "Core", - "severity": "S2", - "see_also": ["https://bugzilla.mozilla.org/show_bug.cgi?id=1863217"], - "whiteboard": "", "resolution": "", - "summary": "Popup blocker is too strict when opening new windows", + "see_also": ["https://bugzilla.mozilla.org/show_bug.cgi?id=1863217"], + "severity": 2, "status": "NEW", - "cf_user_story": "", - "priority": "P3", - "creation_time": datetime.fromisoformat("2024-04-30T14:04:23Z"), - "keywords": ["webcompat:platform-bug"], - "last_change_time": datetime.fromisoformat("2024-05-14T15:19:21Z"), - "id": 1894244, - "component": "DOM: Window and Location", - "cf_last_resolved": None, + "summary": "Popup blocker is too strict when opening new windows", "url": "", - "blocks": [1656444, 1835339, 222222], - "assigned_to": "nobody@mozilla.org", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, { - "whiteboard": "", - "see_also": [], - "severity": "S3", - "product": "Core", + "assigned_to": "nobody@mozilla.org", + "blocks": [], + "component": "JavaScript Engine", + "creation_time": "2024-03-21T16:40:27Z", + "creator": "nobody@mozilla.org", "depends_on": [999999], - "summary": "Example core site report and platform bug", - "resolution": "", - "last_change_time": datetime.fromisoformat("2024-05-27T15:07:03Z"), + "id": 444444, "keywords": ["webcompat:platform-bug", "webcompat:site-report"], - "priority": "P3", - "creation_time": datetime.fromisoformat("2024-03-21T16:40:27Z"), - "cf_user_story": "", + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": 3, + "product": "Core", + "resolution": "", + "see_also": [], + "severity": 3, "status": "NEW", - "blocks": [], + "summary": "Example core site report and platform bug", "url": "", - "cf_last_resolved": None, - "component": "JavaScript Engine", - "id": 444444, - "assigned_to": "nobody@mozilla.org", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, ] -} +) + +SAMPLE_ALL_BUGS = {**SAMPLE_KB_BUGS} +SAMPLE_ALL_BUGS.update(SAMPLE_CORE_BUGS) +SAMPLE_ALL_BUGS.update(SAMPLE_BREAKAGE_BUGS) +SAMPLE_ALL_BUGS.update(SAMPLE_CORE_AS_KB_BUGS) +SAMPLE_ALL_BUGS.update(SAMPLE_ETP_BUGS) +SAMPLE_ALL_BUGS.update(SAMPLE_ETP_DEPENDENCIES_BUGS) + SAMPLE_HISTORY = to_history( [ @@ -517,7 +653,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "added": "webcompat:needs-diagnosis", }, ], - "when": datetime.fromisoformat("2023-05-01T17:41:18Z"), + "when": "2023-05-01T17:41:18Z", "who": "example", } ], @@ -536,7 +672,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: {"removed": "--", "added": "S4", "field_name": "severity"}, ], "who": "example", - "when": datetime.fromisoformat("2023-03-18T16:58:27Z"), + "when": "2023-03-18T16:58:27Z", }, { "changes": [ @@ -551,7 +687,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "added": "example@example.com", }, ], - "when": datetime.fromisoformat("2023-06-01T10:00:00Z"), + "when": "2023-06-01T10:00:00Z", "who": "example", }, ], @@ -562,7 +698,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "history": [ { "changes": [], - "when": datetime.fromisoformat("2023-07-01T12:00:00Z"), + "when": "2023-07-01T12:00:00Z", "who": "example", } ], @@ -584,7 +720,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "added": "webcompat:platform-bug", }, ], - "when": datetime.fromisoformat("2023-05-01T14:00:00Z"), + "when": "2023-05-01T14:00:00Z", "who": "example", }, { @@ -595,7 +731,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "added": "RESOLVED", } ], - "when": datetime.fromisoformat("2023-08-01T14:00:00Z"), + "when": "2023-08-01T14:00:00Z", "who": "example", }, ], @@ -634,7 +770,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "history": [ { "who": "someone@example.com", - "when": datetime.fromisoformat("2024-05-13T16:03:18Z"), + "when": "2024-05-13T16:03:18Z", "changes": [ { "field_name": "cf_user_story", @@ -657,7 +793,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: }, { "who": "someone@example.com", - "when": datetime.fromisoformat("2024-05-21T17:17:52Z"), + "when": "2024-05-21T17:17:52Z", "changes": [ { "removed": "", @@ -667,7 +803,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: ], }, { - "when": datetime.fromisoformat("2024-05-21T17:22:20Z"), + "when": "2024-05-21T17:22:20Z", "changes": [ {"field_name": "depends_on", "added": "1886820", "removed": ""} ], @@ -686,7 +822,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "", }, ], - "when": datetime.fromisoformat("2024-05-27T15:07:33Z"), + "when": "2024-05-27T15:07:33Z", "who": "someone@example.com", }, { @@ -694,7 +830,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "changes": [ {"field_name": "depends_on", "added": "1876368", "removed": ""} ], - "when": datetime.fromisoformat("2024-06-05T19:25:37Z"), + "when": "2024-06-05T19:25:37Z", }, { "changes": [ @@ -704,7 +840,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "", } ], - "when": datetime.fromisoformat("2024-06-09T02:49:27Z"), + "when": "2024-06-09T02:49:27Z", "who": "someone@example.com", }, { @@ -716,7 +852,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "webcompat:needs-sitepatch", } ], - "when": datetime.fromisoformat("2024-06-11T16:34:22Z"), + "when": "2024-06-11T16:34:22Z", }, ], "alias": None, @@ -733,7 +869,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "webcompat:needs-diagnosis", } ], - "when": datetime.fromisoformat("2024-06-11T16:34:22Z"), + "when": "2024-06-11T16:34:22Z", }, ], "alias": None, @@ -742,103 +878,140 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: ] ) -MISSING_KEYWORDS_BUGS = { - item["id"]: item - for item in [ +MISSING_KEYWORDS_INITIAL = to_history_entry( + [ + { + "number": 1898563, + "who": "name@example.com", + "change_time": datetime.fromisoformat("2024-05-23T16:40:29Z"), + "changes": [ + { + "added": "webcompat:needs-diagnosis, webcompat:needs-sitepatch", + "field_name": "keywords", + "removed": "", + } + ], + }, + { + "number": 222222, + "who": "name@example.com", + "change_time": datetime.fromisoformat("2024-05-13T13:02:11Z"), + "changes": [ + { + "added": "webcompat:needs-diagnosis", + "field_name": "keywords", + "removed": "", + } + ], + }, + ] +) + +MISSING_KEYWORDS_BUGS = to_bugs_by_id( + [ { + "assigned_to": "nobody@mozilla.org", + "blocks": [], + "component": "Site Reports", + "creation_time": "2024-05-23T16:40:29Z", "creator": "name@example.com", - "see_also": ["https://github.com/webcompat/web-bugs/issues/135636"], + "depends_on": [1886128], "id": 1898563, - "component": "Site Reports", "keywords": ["webcompat:needs-diagnosis", "webcompat:needs-sitepatch"], - "resolution": "", - "summary": "mylotto.co.nz - Website not supported on Firefox", + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, "product": "Web Compatibility", - "creator_detail": { - "real_name": "Sample", - "id": 111111, - "nick": "sample", - "email": "name@example.com", - "name": "name@example.com", - }, + "resolution": "", + "see_also": ["https://github.com/webcompat/web-bugs/issues/135636"], + "severity": None, "status": "NEW", - "depends_on": [1886128], - "creation_time": datetime.fromisoformat("2024-05-23T16:40:29Z"), + "summary": "mylotto.co.nz - Website not supported on Firefox", + "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, { + "assigned_to": "nobody@mozilla.org", + "blocks": [], "component": "Site Reports", - "keywords": ["webcompat:sitepatch-applied"], - "see_also": ["https://github.com/webcompat/web-bugs/issues/136865"], - "id": 1896383, + "creation_time": "2024-05-13T13:02:11Z", "creator": "name@example.com", "depends_on": [1886820, 1876368], - "status": "NEW", + "id": 1896383, + "keywords": ["webcompat:sitepatch-applied"], + "last_change_time": "2024-06-11T16:34:22Z", + "last_resolved": None, + "priority": None, "product": "Web Compatibility", - "creator_detail": { - "name": "name@example.com", - "id": 111111, - "email": "name@example.com", - "nick": "sample", - "real_name": "Sample", - }, "resolution": "", + "see_also": ["https://github.com/webcompat/web-bugs/issues/136865"], + "severity": None, + "status": "NEW", "summary": "www.unimarc.cl - Buttons not working", - "creation_time": datetime.fromisoformat("2024-05-13T13:02:11Z"), + "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, { - "id": 222222, - "product": "Web Compatibility", + "assigned_to": "nobody@mozilla.org", "blocks": [], - "status": "ASSIGNED", - "summary": "Test breakage bug", - "resolution": "", - "depends_on": [111111], - "see_also": [], "component": "Desktop", - "severity": "--", - "priority": "--", - "creator_detail": { - "name": "name@example.com", - "id": 111111, - "email": "name@example.com", - "nick": "sample", - "real_name": "Sample", - }, + "creation_time": "2024-05-13T13:02:11Z", "creator": "name@example.com", - "creation_time": datetime.fromisoformat("2024-05-13T13:02:11Z"), + "depends_on": [111111], + "id": 222222, "keywords": [], + "last_change_time": "2024-06-11T16:34:22Z", + "last_resolved": None, + "priority": None, + "product": "Web Compatibility", + "resolution": "", + "see_also": [], + "severity": None, + "status": "ASSIGNED", + "summary": "Test breakage bug", + "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", }, ] -} +) -REMOVED_READDED_BUGS = { - item["id"]: item - for item in [ +REMOVED_READDED_BUGS = to_bugs_by_id( + [ { - "id": 333333, - "product": "Web Compatibility", + "assigned_to": "nobody@mozilla.org", "blocks": [], - "status": "ASSIGNED", - "summary": "Test breakage bug", - "resolution": "", - "depends_on": [111111], - "see_also": [], "component": "Desktop", - "severity": "--", - "priority": "--", - "creator_detail": { - "name": "name@example.com", - "id": 111111, - "email": "name@example.com", - "nick": "sample", - "real_name": "Sample", - }, + "creation_time": "2024-05-13T13:02:11Z", "creator": "name@example.com", - "creation_time": datetime.fromisoformat("2024-05-13T13:02:11Z"), + "depends_on": [111111], + "id": 333333, "keywords": ["webcompat:needs-diagnosis"], + "last_change_time": "2024-05-27T15:07:03Z", + "last_resolved": None, + "priority": None, + "product": "Web Compatibility", + "resolution": "", + "see_also": [], + "severity": None, + "status": "ASSIGNED", + "summary": "Test breakage bug", + "url": "", + "user_story": "", + "webcompat_priority": None, + "webcompat_score": None, + "whiteboard": "", } ] -} +) REMOVED_READDED_HISTORY = to_history( [ @@ -853,7 +1026,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "webcompat:needs-diagnosis", } ], - "when": datetime.fromisoformat("2024-06-11T16:34:22Z"), + "when": "2024-06-11T16:34:22Z", }, { "who": "someone@example.com", @@ -864,7 +1037,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "", } ], - "when": datetime.fromisoformat("2024-06-15T16:34:22Z"), + "when": "2024-06-15T16:34:22Z", }, { "who": "someone@example.com", @@ -875,7 +1048,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "", } ], - "when": datetime.fromisoformat("2024-07-11T16:34:22Z"), + "when": "2024-07-11T16:34:22Z", }, { "who": "someone@example.com", @@ -886,7 +1059,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "webcompat:needs-sitepatch", } ], - "when": datetime.fromisoformat("2024-07-14T16:34:22Z"), + "when": "2024-07-14T16:34:22Z", }, { "who": "someone@example.com", @@ -897,7 +1070,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "webcompat:needs-diagnosis", } ], - "when": datetime.fromisoformat("2024-09-11T16:34:22Z"), + "when": "2024-09-11T16:34:22Z", }, { "who": "someone@example.com", @@ -908,7 +1081,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: "removed": "", } ], - "when": datetime.fromisoformat("2024-12-11T16:34:22Z"), + "when": "2024-12-11T16:34:22Z", }, ], "alias": None, @@ -930,18 +1103,18 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: }, ], "who": "someone@example.com", - "when": datetime.fromisoformat("2018-05-02T18:25:47Z"), + "when": "2018-05-02T18:25:47Z", }, { "changes": [ {"added": "RESOLVED", "removed": "NEW", "field_name": "status"} ], - "when": datetime.fromisoformat("2024-05-16T10:58:15Z"), + "when": "2024-05-16T10:58:15Z", "who": "someone@example.com", }, { "who": "someone@example.com", - "when": datetime.fromisoformat("2024-06-03T14:44:48Z"), + "when": "2024-06-03T14:44:48Z", "changes": [ { "removed": "RESOLVED", @@ -956,7 +1129,7 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: ], }, { - "when": datetime.fromisoformat("2016-01-14T14:01:36Z"), + "when": "2016-01-14T14:01:36Z", "who": "someone@example.com", "changes": [ { @@ -977,35 +1150,32 @@ def to_history_entry(data: list[dict[str, Any]]) -> list[BugHistoryEntry]: @pytest.fixture(scope="module") @patch("webcompat_kb.base.google.auth.default") @patch("webcompat_kb.base.bigquery.Client") -def bz(mock_bq, mock_auth_default): +def bq_client(mock_bq, mock_auth_default): mock_credentials = Mock() mock_project_id = "placeholder_id" mock_auth_default.return_value = (mock_credentials, mock_project_id) mock_bq.return_value = Mock() mock_bq.return_value = Mock() - client = get_client(mock_project_id) - return BugzillaToBigQuery( - client=client, - bq_dataset_id="placeholder_dataset", - bugzilla_api_key="placeholder_key", - write=False, - include_history=True, - recreate_history=False, - ) + get_client(mock_project_id) + + +@pytest.fixture(scope="module") +def history_updater(bq_client): + return BugHistoryUpdater(bq_client, "test", None) def test_extract_int_from_field(): field = extract_int_from_field("P3") assert field == 3 - field = extract_int_from_field("critical") + field = extract_int_from_field("critical", value_map={"critical": 1}) assert field == 1 field = extract_int_from_field("--") assert field is None - field = extract_int_from_field("N/A") + field = extract_int_from_field("N/A", value_map={"n/a": None}) assert field is None field = extract_int_from_field("") @@ -1015,279 +1185,170 @@ def test_extract_int_from_field(): assert field is None -def test_process_relations_with_no_bugs(bz): - result = bz.process_relations({}, RELATION_CONFIG) - expected = ({}, {"core": set(), "breakage": set()}) - assert result == expected - - -def test_process_relations(bz): - bugs, ids = bz.process_relations(SAMPLE_BUGS, RELATION_CONFIG) - expected_processed_bugs = { - 1835339: { - "core_bugs": [903746], - "breakage_reports": [], - "interventions": [ - "https://github.com/mozilla-extensions/webcompat-addon/blob/5b391018e847a1eb30eba4784c86acd1c638ed26/src/injections/js/bug1739489-draftjs-beforeinput.js" # noqa - ], - "other_browser_issues": [], - "standards_issues": [], - "standards_positions": [], - }, - 1835416: { - "core_bugs": [], - "breakage_reports": [], - "interventions": [], - "other_browser_issues": [], - "standards_issues": [], - "standards_positions": [ - "https://mozilla.github.io/standards-positions/#webusb" - ], - }, - 111111: { - "core_bugs": [555555], - "breakage_reports": [222222, 1734557], - "interventions": [], - "other_browser_issues": ["https://crbug.com/606208"], - "standards_issues": ["https://github.com/whatwg/html/issues/1896"], - "standards_positions": [ - "https://github.com/mozilla/standards-positions/issues/20", - "https://github.com/WebKit/standards-positions/issues/186", - ], - }, - } - - expected_bug_ids = { - "core": {903746, 555555}, - "breakage": {222222, 1734557}, - } +def test_group_bugs(): + site_reports, etp_reports, kb_bugs, platform_bugs = group_bugs(SAMPLE_ALL_BUGS) + assert site_reports == {222222, 444444, 1734557} + assert etp_reports == set(SAMPLE_ETP_BUGS.keys()) + assert kb_bugs == set(SAMPLE_KB_BUGS.keys()) | set( + SAMPLE_CORE_AS_KB_BUGS.keys() + ) | {444444, 999999, 1101005} + assert platform_bugs == set(SAMPLE_CORE_BUGS.keys()) | set( + SAMPLE_CORE_AS_KB_BUGS.keys() + ) | set(SAMPLE_ETP_DEPENDENCIES_BUGS.keys()) | {444444} - assert bugs == expected_processed_bugs - assert ids == expected_bug_ids +def test_get_kb_bug_site_report(): + site_reports, _, kb_bugs, _ = group_bugs(SAMPLE_ALL_BUGS) -def test_add_breakage_kb_entries(bz): - kb_bugs = { - bug_id: bug - for bug_id, bug in SAMPLE_BREAKAGE_BUGS.items() - if bug["product"] != "Web Compatibility" + kb_bugs_site_reports = get_kb_bug_site_report( + SAMPLE_ALL_BUGS, kb_bugs, site_reports + ) + assert kb_bugs_site_reports == { + 111111: {1734557, 222222}, + 444444: {444444}, + 1894244: {222222}, } - kb_data, kb_dep_ids = bz.process_relations(kb_bugs, RELATION_CONFIG) - assert set(kb_data.keys()) == set(kb_bugs.keys()) - assert kb_dep_ids["breakage"] == set() - - bz.add_kb_entry_breakage(kb_data, kb_dep_ids, SAMPLE_BREAKAGE_BUGS) - assert kb_data[444444]["breakage_reports"] == [444444] - assert kb_dep_ids["breakage"] == set(kb_bugs.keys()) - - -def test_relations(bz): - bugs, _ = bz.process_relations(SAMPLE_BUGS, RELATION_CONFIG) - relations = bz.build_relations(bugs, RELATION_CONFIG) - - assert relations["core_bugs"] == [ - {"knowledge_base_bug": 1835339, "core_bug": 903746}, - {"knowledge_base_bug": 111111, "core_bug": 555555}, - ] - - assert relations["breakage_reports"] == [ - {"knowledge_base_bug": 111111, "breakage_bug": 222222}, - {"knowledge_base_bug": 111111, "breakage_bug": 1734557}, - ] - assert relations["interventions"] == [ - { - "knowledge_base_bug": 1835339, - "code_url": "https://github.com/mozilla-extensions/webcompat-addon/blob/5b391018e847a1eb30eba4784c86acd1c638ed26/src/injections/js/bug1739489-draftjs-beforeinput.js", # noqa - } - ] - assert relations["other_browser_issues"] == [ - {"knowledge_base_bug": 111111, "issue_url": "https://crbug.com/606208"} - ] - assert relations["standards_issues"] == [ - { - "knowledge_base_bug": 111111, - "issue_url": "https://github.com/whatwg/html/issues/1896", - } - ] - assert relations["standards_positions"] == [ - { - "knowledge_base_bug": 1835416, - "discussion_url": "https://mozilla.github.io/standards-positions/#webusb", # noqa - }, - { - "knowledge_base_bug": 111111, - "discussion_url": "https://github.com/mozilla/standards-positions/issues/20", # noqa - }, - { - "knowledge_base_bug": 111111, - "discussion_url": "https://github.com/WebKit/standards-positions/issues/186", # noqa - }, - ] +def test_get_kb_bug_core_bug(): + _, _, kb_bugs, platform_bugs = group_bugs(SAMPLE_ALL_BUGS) -def test_add_links(bz): - bugs, _ = bz.process_relations(SAMPLE_BUGS, RELATION_CONFIG) - core_bugs, _ = bz.process_relations( - SAMPLE_CORE_BUGS, {key: RELATION_CONFIG[key] for key in LINK_FIELDS} - ) - - result = bz.add_links(bugs, core_bugs) + kb_bugs_core_bugs = get_kb_bug_core_bugs(SAMPLE_ALL_BUGS, kb_bugs, platform_bugs) + assert kb_bugs_core_bugs == {111111: {555555}, 1835339: {903746}} - assert result[1835339]["standards_issues"] == [ - "https://github.com/w3c/uievents/issues/353" - ] - assert result[111111]["standards_positions"] == [ - "https://github.com/mozilla/standards-positions/issues/20", - "https://github.com/WebKit/standards-positions/issues/186", - "https://mozilla.github.io/standards-positions/#testposition", - ] +def test_get_etp_breakage_reports(): + _, etp_bugs, _, _ = group_bugs(SAMPLE_ALL_BUGS) -def test_add_links_no_core(bz): - bugs, _ = bz.process_relations(SAMPLE_BUGS, RELATION_CONFIG) - core_bugs, _ = bz.process_relations(SAMPLE_CORE_BUGS, RELATION_CONFIG) + etp_links = get_etp_breakage_reports(SAMPLE_ALL_BUGS, etp_bugs) - result = bz.add_links(bugs, {}) + assert etp_links == { + 1910548: {1101005, 1875061}, + 1921943: {1101005, 1797458}, + 1928102: {1101005}, + } - assert result[1835339]["standards_issues"] == [] - assert result[111111]["standards_positions"] == [ - "https://github.com/mozilla/standards-positions/issues/20", - "https://github.com/WebKit/standards-positions/issues/186", - ] +def test_get_external_links(): + _, _, kb_bugs, _ = group_bugs(SAMPLE_ALL_BUGS) -def test_get_bugs_updated_since_last_import(bz): - all_bugs = { - item["id"]: item - for item in [ - { - "id": 1, - "last_change_time": datetime.fromisoformat("2023-04-01T10:00:00Z"), - }, - { - "id": 2, - "last_change_time": datetime.fromisoformat("2023-04-02T11:30:00Z"), - }, - { - "id": 3, - "last_change_time": datetime.fromisoformat("2023-04-03T09:45:00Z"), - }, - ] + assert EXTERNAL_LINK_CONFIGS["interventions"].get_links( + SAMPLE_ALL_BUGS, kb_bugs + ) == { + 1835339: { + "https://github.com/mozilla-extensions/webcompat-addon/blob/5b391018e847a1eb30eba4784c86acd1c638ed26/src/injections/js/bug1739489-draftjs-beforeinput.js" + }, } - last_import_time = datetime(2023, 4, 2, 10, 0, tzinfo=timezone.utc) - expected_result = {2, 3} - result = bz.get_bugs_updated_since_last_import(all_bugs, last_import_time) - assert result == expected_result - + assert EXTERNAL_LINK_CONFIGS["other_browser_issues"].get_links( + SAMPLE_ALL_BUGS, kb_bugs + ) == {111111: {"https://crbug.com/606208"}} -def test_filter_bug_history_changes(bz): - expected_result = to_history_entry( - [ - { - "number": 1536482, - "who": "example", - "change_time": datetime.fromisoformat("2023-05-01T17:41:18Z"), - "changes": [ - { - "field_name": "keywords", - "removed": "", - "added": "webcompat:needs-diagnosis", - } - ], - }, - { - "number": 1536483, - "who": "example", - "change_time": datetime.fromisoformat("2023-03-18T16:58:27Z"), - "changes": [ - { - "field_name": "cf_user_story", - "added": "@@ -0,0 +1,3 @@\n+platform:linux\r\n+impact:feature-broken\r\n+affects:some\n\\ No newline at end of file\n", # noqa - "removed": "", - } - ], - }, - { - "number": 1536483, - "who": "example", - "change_time": datetime.fromisoformat("2023-06-01T10:00:00Z"), - "changes": [ - { - "field_name": "status", - "added": "ASSIGNED", - "removed": "UNCONFIRMED", - } - ], - }, - { - "number": 1536485, - "who": "example", - "change_time": datetime.fromisoformat("2023-05-01T14:00:00Z"), - "changes": [ - { - "removed": "", - "field_name": "keywords", - "added": "webcompat:platform-bug", - } - ], - }, - { - "number": 1536485, - "who": "example", - "change_time": datetime.fromisoformat("2023-08-01T14:00:00Z"), - "changes": [ - {"removed": "ASSIGNED", "field_name": "status", "added": "RESOLVED"} - ], - }, - ] - ) + assert EXTERNAL_LINK_CONFIGS["standards_issues"].get_links( + SAMPLE_ALL_BUGS, kb_bugs + ) == { + 111111: {"https://github.com/whatwg/html/issues/1896"}, + 1835339: {"https://github.com/w3c/uievents/issues/353"}, + } - result, bug_ids = bz.extract_history_fields(SAMPLE_HISTORY) - assert result == expected_result - assert bug_ids == {1536482, 1536483, 1536485} + assert EXTERNAL_LINK_CONFIGS["standards_positions"].get_links( + SAMPLE_ALL_BUGS, kb_bugs + ) == { + 111111: { + "https://github.com/mozilla/standards-positions/issues/20", + "https://github.com/WebKit/standards-positions/issues/186", + "https://mozilla.github.io/standards-positions/#testposition", + }, + 1835416: {"https://mozilla.github.io/standards-positions/#webusb"}, + } -def test_create_synthetic_history(bz): - history, bug_ids = bz.extract_history_fields(MISSING_KEYWORDS_HISTORY) - result = bz.create_synthetic_history(MISSING_KEYWORDS_BUGS, history) +def test_bugzilla_to_history_entry(history_updater): + expected_result = {bug_id: [] for bug_id in SAMPLE_HISTORY} - expected = to_history_entry( - [ - { - "number": 1898563, - "who": "name@example.com", - "change_time": datetime.fromisoformat("2024-05-23T16:40:29Z"), - "changes": [ - { - "added": "webcompat:needs-diagnosis, webcompat:needs-sitepatch", - "field_name": "keywords", - "removed": "", - } - ], - }, - { - "number": 222222, - "who": "name@example.com", - "change_time": datetime.fromisoformat("2024-05-13T13:02:11Z"), - "changes": [ - { - "added": "webcompat:needs-diagnosis", - "field_name": "keywords", - "removed": "", - } - ], - }, - ] + expected_result.update( + to_history_entry( + [ + { + "number": 1536482, + "who": "example", + "change_time": datetime.fromisoformat("2023-05-01T17:41:18Z"), + "changes": [ + { + "field_name": "keywords", + "removed": "", + "added": "webcompat:needs-diagnosis", + } + ], + }, + { + "number": 1536483, + "who": "example", + "change_time": datetime.fromisoformat("2023-03-18T16:58:27Z"), + "changes": [ + { + "field_name": "cf_user_story", + "added": "@@ -0,0 +1,3 @@\n+platform:linux\r\n+impact:feature-broken\r\n+affects:some\n\\ No newline at end of file\n", # noqa + "removed": "", + } + ], + }, + { + "number": 1536483, + "who": "example", + "change_time": datetime.fromisoformat("2023-06-01T10:00:00Z"), + "changes": [ + { + "field_name": "status", + "added": "ASSIGNED", + "removed": "UNCONFIRMED", + } + ], + }, + { + "number": 1536485, + "who": "example", + "change_time": datetime.fromisoformat("2023-05-01T14:00:00Z"), + "changes": [ + { + "removed": "", + "field_name": "keywords", + "added": "webcompat:platform-bug", + } + ], + }, + { + "number": 1536485, + "who": "example", + "change_time": datetime.fromisoformat("2023-08-01T14:00:00Z"), + "changes": [ + { + "removed": "ASSIGNED", + "field_name": "status", + "added": "RESOLVED", + } + ], + }, + ] + ) ) - assert result == expected + entries = history_updater.bugzilla_to_history_entry(SAMPLE_HISTORY) + assert entries == expected_result + + +def test_create_initial_history(history_updater): + history = history_updater.bugzilla_to_history_entry(MISSING_KEYWORDS_HISTORY) + result = history_updater.create_initial_history_entry( + MISSING_KEYWORDS_BUGS, history + ) + assert result == MISSING_KEYWORDS_INITIAL -def test_create_synthetic_history_removed_readded(bz): - history, bug_ids = bz.extract_history_fields(REMOVED_READDED_HISTORY) - result = bz.create_synthetic_history(REMOVED_READDED_BUGS, history) + +def test_create_initial_history_removed_readded(history_updater): + history = history_updater.bugzilla_to_history_entry(REMOVED_READDED_HISTORY) + result = history_updater.create_initial_history_entry(REMOVED_READDED_BUGS, history) expected = to_history_entry( [ @@ -1309,7 +1370,22 @@ def test_create_synthetic_history_removed_readded(bz): assert result == expected -def test_is_removed_earliest(bz): +@patch("webcompat_kb.bugzilla.BugHistoryUpdater.bugzilla_fetch_history") +def test_create_new_bugs_history(mock_bugzilla_fetch_history, history_updater): + mock_bugzilla_fetch_history.return_value = ( + history_updater.bugzilla_to_history_entry(MISSING_KEYWORDS_HISTORY) + ) + + expected = history_updater.bugzilla_to_history_entry(MISSING_KEYWORDS_HISTORY) + for bug_id, update in MISSING_KEYWORDS_INITIAL.items(): + expected[bug_id].extend(update) + + result = history_updater.new_bugs_history(MISSING_KEYWORDS_BUGS) + + assert result == expected + + +def test_missing_initial_add(): keyword_map = { "added": { "webcompat:needs-sitepatch": [ @@ -1331,71 +1407,35 @@ def test_is_removed_earliest(bz): }, } - is_removed_first_diagnosis = bz.is_removed_earliest( - keyword_map["added"]["webcompat:needs-diagnosis"], - keyword_map["removed"]["webcompat:needs-diagnosis"], - ) - - is_removed_first_sitepatch = bz.is_removed_earliest( - keyword_map["added"]["webcompat:needs-sitepatch"], - keyword_map["removed"]["webcompat:needs-sitepatch"], - ) - - is_removed_first_empty_added = bz.is_removed_earliest( - [], - [datetime(2024, 7, 14, 16, 34, 22, tzinfo=timezone.utc)], - ) - - is_removed_first_empty_removed = bz.is_removed_earliest( - [datetime(2024, 7, 14, 16, 34, 22, tzinfo=timezone.utc)], - [], - ) - - is_removed_first_empty = bz.is_removed_earliest( - [], - [], - ) - - assert is_removed_first_diagnosis - assert not is_removed_first_sitepatch - assert is_removed_first_empty_added - assert not is_removed_first_empty_removed - assert not is_removed_first_empty - - -@patch("webcompat_kb.bugzilla.BugzillaToBigQuery.get_existing_history_records_by_ids") -def test_filter_only_unsaved_changes(mock_get_existing, bz): - mock_get_existing.return_value = to_history_entry( - [ - { - "number": 1896383, - "who": "someone@example.com", - "change_time": datetime(2024, 5, 27, 15, 7, 33, tzinfo=timezone.utc), - "changes": [ - { - "field_name": "keywords", - "added": "webcompat:needs-sitepatch", - "removed": "webcompat:needs-diagnosis", - } - ], - }, - { - "number": 1896383, - "who": "someone@example.com", - "change_time": datetime(2024, 6, 11, 16, 34, 22, tzinfo=timezone.utc), - "changes": [ - { - "field_name": "keywords", - "added": "webcompat:sitepatch-applied", - "removed": "webcompat:needs-sitepatch", - } - ], - }, - ] + property_histories = defaultdict(PropertyHistory) + for action, items in keyword_map.items(): + for keyword, change_times in items.items(): + for change_time in change_times: + property_histories[keyword].add(change_time, action) + + assert property_histories["webcompat:needs-diagnosis"].missing_initial_add() + assert not property_histories["webcompat:needs-sitepatch"].missing_initial_add() + removed_first = PropertyHistory() + removed_first.add(datetime(2024, 7, 14, 16, 34, 22, tzinfo=timezone.utc), "removed") + assert removed_first.missing_initial_add() + added_first = PropertyHistory() + added_first.add(datetime(2024, 7, 14, 16, 34, 22, tzinfo=timezone.utc), "added") + assert not added_first.missing_initial_add() + empty_history = PropertyHistory() + assert empty_history.missing_initial_add() + + +@patch("webcompat_kb.bugzilla.BugHistoryUpdater.bigquery_last_import") +@patch("webcompat_kb.bugzilla.BugHistoryUpdater.bugzilla_fetch_history") +def test_existing_bugs_history( + mock_bugzilla_fetch_history, mock_last_import, history_updater +): + mock_last_import.return_value = datetime(2020, 1, 1, tzinfo=timezone.utc) + mock_bugzilla_fetch_history.return_value = ( + history_updater.bugzilla_to_history_entry(MISSING_KEYWORDS_HISTORY) ) - history, bug_ids = bz.extract_history_fields(MISSING_KEYWORDS_HISTORY) - result = bz.filter_only_unsaved_changes(history, bug_ids) + result = history_updater.existing_bugs_history(MISSING_KEYWORDS_BUGS) expected = to_history_entry( [ @@ -1428,123 +1468,6 @@ def test_filter_only_unsaved_changes(mock_get_existing, bz): }, ], }, - { - "number": 222222, - "who": "someone@example.com", - "change_time": datetime.fromisoformat("2024-06-11T16:34:22Z"), - "changes": [ - { - "field_name": "keywords", - "added": "", - "removed": "webcompat:needs-diagnosis", - } - ], - }, - ] - ) - - result.sort(key=lambda item: item.number) - expected.sort(key=lambda item: item.number) - - assert result == expected - - -@patch("webcompat_kb.bugzilla.BugzillaToBigQuery.get_existing_history_records_by_ids") -def test_filter_only_unsaved_changes_multiple_changes(mock_get_existing, bz): - mock_get_existing.return_value = to_history_entry( - [ - { - "number": 1239595, - "who": "someone@example.com", - "change_time": datetime(2018, 5, 2, 18, 25, 47, tzinfo=timezone.utc), - "changes": [ - { - "field_name": "keywords", - "added": "parity-chrome, parity-edge, parity-ie", - "removed": "", - } - ], - }, - { - "number": 1239595, - "who": "someone@example.com", - "change_time": datetime(2016, 1, 14, 14, 1, 36, tzinfo=timezone.utc), - "changes": [ - {"field_name": "status", "added": "NEW", "removed": "UNCONFIRMED"} - ], - }, - { - "number": 1239595, - "who": "someone@example.com", - "change_time": datetime(2024, 5, 16, 10, 58, 15, tzinfo=timezone.utc), - "changes": [ - {"field_name": "status", "added": "RESOLVED", "removed": "NEW"} - ], - }, - ] - ) - - history, bug_ids = bz.extract_history_fields(KEYWORDS_AND_STATUS) - result = bz.filter_only_unsaved_changes(history, bug_ids) - changes = result[0].changes - - expected_changes = [ - BugHistoryChange(**item) - for item in [ - { - "field_name": "keywords", - "added": "webcompat:platform-bug", - "removed": "", - }, - {"field_name": "status", "added": "REOPENED", "removed": "RESOLVED"}, - ] - ] - - changes.sort(key=lambda item: item.field_name) - expected_changes.sort(key=lambda item: item.field_name) - - assert len(result) == 1 - assert changes == expected_changes - - -@patch("webcompat_kb.bugzilla.BugzillaToBigQuery.get_existing_history_records_by_ids") -def test_filter_only_unsaved_changes_empty(mock_get_existing, bz): - mock_get_existing.return_value = [] - - history, bug_ids = bz.extract_history_fields(MISSING_KEYWORDS_HISTORY) - result = bz.filter_only_unsaved_changes(history, bug_ids) - - expected = to_history_entry( - [ - { - "number": 1898563, - "who": "name@example.com", - "change_time": datetime.fromisoformat("2024-05-27T15:10:10Z"), - "changes": [ - { - "added": "@@ -1 +1,4 @@\n-\n+platform:windows,mac,linux,android\r\n+impact:blocked\r\n+configuration:general\r\n+affects:all\n", - "field_name": "cf_user_story", - "removed": "", - } - ], - }, - { - "number": 1896383, - "who": "someone@example.com", - "change_time": datetime.fromisoformat("2024-05-13T16:03:18Z"), - "changes": [ - { - "field_name": "cf_user_story", - "added": "@@ -1 +1,4 @@\n-\n+platform:windows,mac,linux\r\n+impact:site-broken\r\n+configuration:general\r\n+affects:all\n", - "removed": "", - }, - { - "removed": "", - "field_name": "keywords", - "added": "webcompat:needs-diagnosis", - }, - ], - }, { "number": 1896383, "who": "someone@example.com", @@ -1554,7 +1477,7 @@ def test_filter_only_unsaved_changes_empty(mock_get_existing, bz): "removed": "webcompat:needs-diagnosis", "field_name": "keywords", "added": "webcompat:needs-sitepatch", - } + }, ], }, { @@ -1587,338 +1510,175 @@ def test_filter_only_unsaved_changes_empty(mock_get_existing, bz): assert result == expected -@patch("webcompat_kb.bugzilla.BugzillaToBigQuery.get_existing_history_records_by_ids") -def test_filter_only_unsaved_changes_synthetic(mock_get_existing, bz): - history, bug_ids = bz.extract_history_fields(MISSING_KEYWORDS_HISTORY) - s_history = bz.create_synthetic_history(MISSING_KEYWORDS_BUGS, history) +@patch("webcompat_kb.bugzilla.BugHistoryUpdater.bigquery_last_import") +@patch("webcompat_kb.bugzilla.BugHistoryUpdater.bugzilla_fetch_history") +def test_existing_bugs_history_filter_updated( + mock_bugzilla_fetch_history, mock_last_import, history_updater +): + mock_last_import.return_value = datetime(2024, 5, 28, tzinfo=timezone.utc) + mock_bugzilla_fetch_history.return_value = ( + history_updater.bugzilla_to_history_entry(MISSING_KEYWORDS_HISTORY) + ) + + result = history_updater.existing_bugs_history(MISSING_KEYWORDS_BUGS) - mock_get_existing.return_value = to_history_entry( + expected = to_history_entry( [ { - "number": 1898563, - "who": "name@example.com", - "change_time": datetime(2024, 5, 23, 16, 40, 29, tzinfo=timezone.utc), + "number": 1896383, + "who": "someone@example.com", + "change_time": datetime.fromisoformat("2024-06-11T16:34:22Z"), "changes": [ { "field_name": "keywords", - "added": "webcompat:needs-diagnosis, webcompat:needs-sitepatch", # noqa - "removed": "", + "added": "webcompat:sitepatch-applied", + "removed": "webcompat:needs-sitepatch", } ], }, - ] - ) - - result = bz.filter_only_unsaved_changes(s_history, bug_ids) - - expected = to_history_entry( - [ { "number": 222222, - "who": "name@example.com", - "change_time": datetime.fromisoformat("2024-05-13T13:02:11Z"), + "who": "someone@example.com", + "change_time": datetime.fromisoformat("2024-06-11T16:34:22Z"), "changes": [ { - "added": "webcompat:needs-diagnosis", "field_name": "keywords", - "removed": "", + "added": "", + "removed": "webcompat:needs-diagnosis", } ], - } + }, ] ) assert result == expected -def test_empty_input(): - assert parse_string_to_json("") == "" - - -def test_null_input(): - assert parse_string_to_json(None) == "" - - -def test_single_key_value_pair(): - input_str = "key:value" - expected = {"key": "value"} - assert parse_string_to_json(input_str) == expected - - -def test_multiple_key_value_pairs(): - input_str = "key1:value1\nkey2:value2" - expected = {"key1": "value1", "key2": "value2"} - assert parse_string_to_json(input_str) == expected - - -def test_multiple_values_for_same_key(): - input_str = "key:value1\r\nkey:value2" - expected = {"key": ["value1", "value2"]} - assert parse_string_to_json(input_str) == expected - - -def test_mixed_line_breaks(): - input_str = "key1:value1\r\nkey2:value2\nkey3:value3" - expected = {"key1": "value1", "key2": "value2", "key3": "value3"} - assert parse_string_to_json(input_str) == expected - - -def test_empty_result(): - input_str = "\n\n" - assert parse_string_to_json(input_str) == "" +def test_missing_records(history_updater): + initial_history = history_updater.bugzilla_to_history_entry( + MISSING_KEYWORDS_HISTORY + ) + new_history = {key: value[:] for key, value in initial_history.items()} + for bug_id, update in MISSING_KEYWORDS_INITIAL.items(): + new_history[bug_id].extend(update) + expected = MISSING_KEYWORDS_INITIAL -def test_severity_string(): - input_str = "platform:linux\r\nimpact:feature-broken\r\naffects:some" - expected = { - "platform": "linux", - "impact": "feature-broken", - "affects": "some", - } - assert parse_string_to_json(input_str) == expected - - -def test_values_with_colon(): - input_str = "url:http://chatgpt-tokenizer.com/*\r\nurl:excalidraw.com/*\r\nurl:godbolt.org/*\r\nurl:youwouldntsteala.website/*\r\nurl:yandex.ru/images/*" # noqa - expected = { - "url": [ - "http://chatgpt-tokenizer.com/*", - "excalidraw.com/*", - "godbolt.org/*", - "youwouldntsteala.website/*", - "yandex.ru/images/*", - ] - } - assert parse_string_to_json(input_str) == expected + result = history_updater.missing_records(initial_history, new_history) + assert result == expected -def test_kb_bugs_from_platform_bugs(bz): - core_as_kb_bugs = bz.kb_bugs_from_platform_bugs( - SAMPLE_CORE_AS_KB_BUGS, {1835339}, {1896383, 222222} - ) - assert core_as_kb_bugs == { - item["id"]: item - for item in [ +@pytest.mark.parametrize( + "input,expected", + [ + ("", {}), + (None, {}), + ("key:value", {"key": "value"}), + ("key1:value1\nkey2:value2", {"key1": "value1", "key2": "value2"}), + ("key:value1\r\nkey:value2", {"key": ["value1", "value2"]}), + ( + "key1:value1\r\nkey2:value2\nkey3:value3", + {"key1": "value1", "key2": "value2", "key3": "value3"}, + ), + ("\n\n", {}), + ( + "platform:linux\r\nimpact:feature-broken\r\naffects:some", { - "assigned_to": "nobody@mozilla.org", - "whiteboard": "", - "see_also": ["https://bugzilla.mozilla.org/show_bug.cgi?id=1740472"], - "severity": "S3", - "product": "Core", - "depends_on": [], - "summary": "Consider adding support for Error.captureStackTrace", - "resolution": "", - "last_change_time": datetime.fromisoformat("2024-05-27T15:07:03Z"), - "keywords": [ - "parity-chrome", - "parity-safari", - "webcompat:platform-bug", - ], - "priority": "P3", - "creation_time": datetime.fromisoformat("2024-03-21T16:40:27Z"), - "cf_user_story": "", - "status": "NEW", - "blocks": [1896383], - "url": "", - "cf_last_resolved": None, - "component": "JavaScript Engine", - "id": 1886820, + "platform": "linux", + "impact": "feature-broken", + "affects": "some", }, + ), + ( + "url:http://chatgpt-tokenizer.com/*\r\nurl:excalidraw.com/*\r\nurl:godbolt.org/*\r\nurl:youwouldntsteala.website/*\r\nurl:yandex.ru/images/*", { - "assigned_to": "nobody@mozilla.org", - "whiteboard": "", - "see_also": [], - "severity": "S3", - "product": "Core", - "depends_on": [], - "summary": "Example core site report and platform bug", - "resolution": "", - "last_change_time": datetime.fromisoformat("2024-05-27T15:07:03Z"), - "keywords": ["webcompat:platform-bug", "webcompat:site-report"], - "priority": "P3", - "creation_time": datetime.fromisoformat("2024-03-21T16:40:27Z"), - "cf_user_story": "", - "status": "NEW", - "blocks": [], - "url": "", - "cf_last_resolved": None, - "component": "JavaScript Engine", - "id": 444444, - }, - ] - } - - -def test_convert_bug_data(bz): - expected_data = [ - { - "assigned_to": "test@example.org", - "component": "Knowledge Base", - "creation_time": "2000-07-25T13:50:04+00:00", - "creator": "nobody@mozilla.org", - "keywords": [], - "number": 1835339, - "priority": None, - "product": "Web Compatibility", - "resolution": "", - "resolved_time": None, - "severity": None, - "status": "NEW", - "title": "Missing implementation of textinput event", - "url": "", - "user_story": { - "url": [ - "cmcreg.bancosantander.es/*", - "new.reddit.com/*", - "web.whatsapp.com/*", - "facebook.com/*", - "twitter.com/*", - "reddit.com/*", - "mobilevikings.be/*", - "book.ersthelfer.tv/*", - ], - }, - "user_story_raw": "url:cmcreg.bancosantander.es/*\r\nurl:new.reddit.com/*\r\nurl:web.whatsapp.com/*\r\nurl:facebook.com/*\r\nurl:twitter.com/*\r\nurl:reddit.com/*\r\nurl:mobilevikings.be/*\r\nurl:book.ersthelfer.tv/*", - "whiteboard": "", - "webcompat_priority": None, - "webcompat_score": None, - }, - { - "assigned_to": None, - "component": "Knowledge Base", - "creation_time": "2000-07-25T13:50:04+00:00", - "creator": "nobody@mozilla.org", - "keywords": [], - "number": 1835416, - "priority": None, - "product": "Web Compatibility", - "resolution": "", - "resolved_time": None, - "severity": None, - "status": "NEW", - "title": "Sites breaking due to the lack of WebUSB support", - "url": "", - "user_story": { "url": [ - "webminidisc.com/*", - "app.webadb.com/*", - "www.numworks.com/*", - "webadb.github.io/*", - "www.stemplayer.com/*", - "wootility.io/*", - "python.microbit.org/*", - "flash.android.com/*", - ], + "http://chatgpt-tokenizer.com/*", + "excalidraw.com/*", + "godbolt.org/*", + "youwouldntsteala.website/*", + "yandex.ru/images/*", + ] }, - "user_story_raw": "url:webminidisc.com/*\r\nurl:app.webadb.com/*\r\nurl:www.numworks.com/*\r\nurl:webadb.github.io/*\r\nurl:www.stemplayer.com/*\r\nurl:wootility.io/*\r\nurl:python.microbit.org/*\r\nurl:flash.android.com/*", - "whiteboard": "", - "webcompat_priority": None, - "webcompat_score": None, - }, - { - "assigned_to": None, - "component": "Knowledge Base", - "creation_time": "2000-07-25T13:50:04+00:00", - "creator": "nobody@mozilla.org", - "keywords": [], - "number": 111111, - "priority": None, - "product": "Web Compatibility", - "resolution": "", - "resolved_time": None, - "severity": None, - "status": "NEW", - "title": "Test bug", - "url": "", - "user_story": "", - "user_story_raw": "", - "whiteboard": "", - "webcompat_priority": None, - "webcompat_score": None, - }, - ] - for bug, expected in zip(SAMPLE_BUGS.values(), expected_data): - assert bz.convert_bug_data(bug) == expected - + ), + ], +) +def test_parse_user_story(input, expected): + assert parse_user_story(input) == expected -def test_unify_etp_dependencies(bz): - unified_etp_bugs = bz.unify_etp_dependencies( - SAMPLE_ETP_BUGS, SAMPLE_ETP_DEPENDENCIES_BUGS - ) - assert unified_etp_bugs == { - item["id"]: item - for item in [ - { - "url": "https://gothamist.com/", - "summary": "gothamist.com - The comments are not displayed with ETP set to Strict", - "id": 1910548, - "keywords": ["priv-webcompat", "webcompat:site-report"], - "component": "Privacy: Site Reports", - "resolution": "", - "blocks": [], - "depends_on": [1101005, 1875061], - "creation_time": datetime.fromisoformat("2024-07-30T07:37:28Z"), - "see_also": ["https://github.com/webcompat/web-bugs/issues/139647"], - "product": "Web Compatibility", - "status": "NEW", - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", - }, - { - "see_also": ["https://github.com/webcompat/web-bugs/issues/142250"], - "id": 1921943, - "summary": "my.farys.be - Login option is missing with ETP set to STRICT", - "product": "Web Compatibility", - "keywords": [ - "priv-webcompat", - "webcompat:platform-bug", - "webcompat:site-report", - ], - "status": "NEW", - "resolution": "", - "component": "Privacy: Site Reports", - "blocks": [], - "depends_on": [1101005, 1797458], - "creation_time": datetime.fromisoformat("2024-10-01T08:50:58Z"), - "url": "https://my.farys.be/myfarys/", - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", - }, +@pytest.mark.parametrize( + "input_data, test_fields", + [ + ( + {}, { - "see_also": [], - "summary": "ryanair.com - The form to start a chat does not load with ETP set to STRICT", - "id": 1928102, - "product": "Web Compatibility", - "status": "NEW", - "keywords": ["webcompat:site-report"], - "blocks": [], - "component": "Privacy: Site Reports", - "resolution": "", - "depends_on": [1101005], - "url": "https://www.ryanair.com/gb/en/lp/chat", - "creation_time": datetime.fromisoformat("2024-10-30T15:04:41Z"), - "cf_webcompat_priority": "---", - "cf_webcompat_score": "---", + "severity": None, + "priority": None, + "creation_time": datetime.fromisoformat("2024-03-21T16:40:27Z"), + "assigned_to": None, + "last_change_time": datetime.fromisoformat("2024-03-22T16:40:27Z"), + "webcompat_priority": None, + "webcompat_score": None, }, - ] + ), + ({"assigned_to": "test@example.org"}, {"assigned_to": "test@example.org"}), + ({"priority": "P1"}, {"priority": 1}), + ({"severity": "blocker"}, {"severity": 1}), + ({"severity": "minor"}, {"severity": 4}), + ({"severity": "S1"}, {"severity": 1}), + ({"severity": "N/A"}, {"severity": None}), + ({"cf_webcompat_priority": "P1"}, {"webcompat_priority": "P1"}), + ({"cf_webcompat_priority": "?"}, {"webcompat_priority": "?"}), + ({"cf_webcompat_score": "10"}, {"webcompat_score": 10}), + ], +) +def test_from_bugzilla(input_data, test_fields): + bug_data = { + "id": 1, + "summary": "Example", + "status": "NEW", + "resolution": "", + "product": "Web Compatibility", + "component": "Test", + "see_also": [], + "depends_on": [], + "blocks": [], + "priority": "--", + "severity": "--", + "creation_time": "2024-03-21T16:40:27Z", + "assigned_to": "nobody@mozilla.org", + "keywords": [], + "url": "https://example.test", + "cf_user_story": "", + "last_resolved": None, + "last_change_time": "2024-03-22T16:40:27Z", + "whiteboard": "", + "creator": "nobody@mozilla.org", + "cf_webcompat_priority": "---", + "cf_webcompat_score": "---", } + bug_data.update(input_data) + bugzilla_bug = bugdantic.bugzilla.Bug.model_validate(bug_data) + bug = Bug.from_bugzilla(bugzilla_bug) + + for attr, expected in test_fields.items(): + assert getattr(bug, attr) == expected + + +def test_read_write_data(): + site_reports, etp_reports, kb_bugs, platform_bugs = group_bugs(SAMPLE_ALL_BUGS) + with tempfile.NamedTemporaryFile("w") as f: + write_bugs( + f.name, + SAMPLE_ALL_BUGS, + site_reports, + etp_reports, + kb_bugs, + platform_bugs, + [], + [], + ) - -def test_build_etp_relations(bz): - unified_etp_bugs = bz.unify_etp_dependencies( - SAMPLE_ETP_BUGS, SAMPLE_ETP_DEPENDENCIES_BUGS - ) - etp_data, _ = bz.process_relations(unified_etp_bugs, ETP_RELATION_CONFIG) - etp_rels = bz.build_relations(etp_data, ETP_RELATION_CONFIG) - - assert etp_rels == { - "etp_breakage_reports": [ - {"breakage_bug": 1910548, "etp_meta_bug": 1101005}, - {"breakage_bug": 1910548, "etp_meta_bug": 1875061}, - {"breakage_bug": 1921943, "etp_meta_bug": 1101005}, - {"breakage_bug": 1921943, "etp_meta_bug": 1797458}, - {"breakage_bug": 1928102, "etp_meta_bug": 1101005}, - ] - } + assert load_bugs(None, f.name) == SAMPLE_ALL_BUGS diff --git a/jobs/webcompat-kb/webcompat_kb/bugzilla.py b/jobs/webcompat-kb/webcompat_kb/bugzilla.py index 0d19472d..9f3d69ab 100644 --- a/jobs/webcompat-kb/webcompat_kb/bugzilla.py +++ b/jobs/webcompat-kb/webcompat_kb/bugzilla.py @@ -1,4 +1,6 @@ import argparse +import enum +import json import logging import os import re @@ -7,14 +9,13 @@ Any, Iterable, Iterator, - Mapping, MutableMapping, - NamedTuple, Optional, - Sequence, - Union, + Self, cast, ) +from collections import defaultdict +from collections.abc import Sequence, Mapping from dataclasses import dataclass from datetime import datetime, timedelta @@ -22,22 +23,170 @@ from google.cloud import bigquery from .base import EtlJob +from .bqhelpers import ensure_table -Bug = Mapping[str, Any] -BugsById = Mapping[int, Bug] -MutBugsById = MutableMapping[int, Bug] -Relations = Mapping[str, list[Mapping[str, Any]]] -RelationConfig = Mapping[str, Mapping[str, Any]] +class BugLoadError(Exception): + pass -@dataclass + +@dataclass(frozen=True) +class Bug: + id: int + summary: str + status: str + resolution: str + product: str + component: str + creator: str + see_also: list[str] + depends_on: list[int] + blocks: list[int] + priority: Optional[int] + severity: Optional[int] + creation_time: datetime + assigned_to: Optional[str] + keywords: list[str] + url: str + user_story: str + last_resolved: Optional[datetime] + last_change_time: datetime + whiteboard: str + webcompat_priority: Optional[str] + webcompat_score: Optional[int] + + @property + def parsed_user_story(self) -> Mapping[str, Any]: + return parse_user_story(self.user_story) + + @property + def resolved(self) -> Optional[datetime]: + if self.status in {"RESOLVED", "VERIFIED"} and self.last_resolved: + return self.last_resolved + return None + + @classmethod + def from_bugzilla(cls, bug: bugdantic.bugzilla.Bug) -> Self: + assert bug.id is not None + assert bug.summary is not None + assert bug.status is not None + assert bug.resolution is not None + assert bug.product is not None + assert bug.component is not None + assert bug.creator is not None + assert bug.see_also is not None + assert bug.depends_on is not None + assert bug.blocks is not None + assert bug.priority is not None + assert bug.severity is not None + assert bug.creation_time is not None + assert bug.assigned_to is not None + assert bug.keywords is not None + assert bug.url is not None + assert bug.last_change_time is not None + assert bug.whiteboard is not None + assert bug.cf_user_story is not None + + return cls( + id=bug.id, + summary=bug.summary, + status=bug.status, + resolution=bug.resolution, + product=bug.product, + component=bug.component, + see_also=bug.see_also, + depends_on=bug.depends_on, + blocks=bug.blocks, + priority=extract_int_from_field( + bug.priority, + value_map={ + "--": None, + }, + ), + severity=extract_int_from_field( + bug.severity, + value_map={ + "n/a": None, + "--": None, + "blocker": 1, + "critical": 1, + "major": 2, + "normal": 3, + "minor": 4, + "trivial": 4, + "enhancement": 4, + }, + ), + creation_time=bug.creation_time, + assigned_to=bug.assigned_to + if bug.assigned_to != "nobody@mozilla.org" + else None, + keywords=bug.keywords, + url=bug.url, + user_story=bug.cf_user_story, + last_resolved=bug.cf_last_resolved, + last_change_time=bug.last_change_time, + whiteboard=bug.whiteboard, + creator=bug.creator, + webcompat_priority=( + bug.cf_webcompat_priority + if bug.cf_webcompat_priority != "---" + else None + ), + webcompat_score=extract_int_from_field( + bug.cf_webcompat_score, + value_map={ + "---": None, + "?": None, + }, + ), + ) + + def to_json(self) -> Mapping[str, Any]: + fields = {**vars(self)} + for key in fields: + if isinstance(fields[key], datetime): + fields[key] = fields[key].isoformat() + return fields + + @classmethod + def from_json(cls, bug_data: Mapping[str, Any]) -> Self: + return cls( + id=bug_data["id"], + summary=bug_data["summary"], + status=bug_data["status"], + resolution=bug_data["resolution"], + product=bug_data["product"], + component=bug_data["component"], + see_also=bug_data["see_also"], + depends_on=bug_data["depends_on"], + blocks=bug_data["blocks"], + priority=bug_data["priority"], + severity=bug_data["severity"], + creation_time=datetime.fromisoformat(bug_data["creation_time"]), + assigned_to=bug_data["assigned_to"], + keywords=bug_data["keywords"], + url=bug_data["url"], + user_story=bug_data["user_story"], + last_resolved=datetime.fromisoformat(bug_data["last_resolved"]) + if bug_data["last_resolved"] is not None + else None, + last_change_time=datetime.fromisoformat(bug_data["last_change_time"]), + whiteboard=bug_data["whiteboard"], + creator=bug_data["creator"], + webcompat_priority=bug_data["webcompat_priority"], + webcompat_score=bug_data["webcompat_score"], + ) + + +@dataclass(frozen=True) class BugHistoryChange: field_name: str added: str removed: str -@dataclass +@dataclass(frozen=True) class BugHistoryEntry: number: int who: str @@ -45,7 +194,8 @@ class BugHistoryEntry: changes: list[BugHistoryChange] -class HistoryRow(NamedTuple): +@dataclass(frozen=True) +class HistoryChange: number: int who: str change_time: datetime @@ -54,29 +204,43 @@ class HistoryRow(NamedTuple): removed: str -class BugFetchError(Exception): - pass +class PropertyChange(enum.StrEnum): + added = "added" + removed = "removed" -BUGZILLA_URL = "https://bugzilla.mozilla.org/" - -OTHER_BROWSER = ["bugs.chromium.org", "bugs.webkit.org", "crbug.com"] -STANDARDS_ISSUES = ["github.com/w3c", "github.com/whatwg", "github.com/wicg"] -STANDARDS_POSITIONS = ["standards-positions"] -INTERVENTIONS = ["github.com/mozilla-extensions/webcompat-addon"] -FIELD_MAP = { - "blocker": 1, - "critical": 1, - "major": 2, - "normal": 3, - "minor": 4, - "trivial": 4, - "enhancement": 4, - "n/a": None, - "--": None, -} +@dataclass(frozen=True) +class PropertyHistoryItem: + change_time: datetime + change: PropertyChange + + +class PropertyHistory: + """Representation of the history of a specific boolean property + (i.e. one that can be present or not)""" + + def __init__(self) -> None: + self.data: list[PropertyHistoryItem] = [] + + def __len__(self) -> int: + return len(self.data) + + def add(self, change_time: datetime, change: PropertyChange) -> None: + self.data.append(PropertyHistoryItem(change_time=change_time, change=change)) -FILTER_CONFIG = { + def missing_initial_add(self) -> bool: + """Check if the property was initially added""" + self.data.sort(key=lambda x: x.change_time) + return len(self.data) == 0 or self.data[0].change == PropertyChange.removed + + +BugId = int +BugsById = Mapping[BugId, Bug] +MutBugsById = MutableMapping[BugId, Bug] + +HistoryByBug = Mapping[BugId, Sequence[BugHistoryEntry]] + +BUG_QUERIES: Mapping[str, dict[str, str]] = { "site_reports_wc": { "product": "Web Compatibility", "component": "Site Reports", @@ -147,91 +311,84 @@ class BugFetchError(Exception): }, } -RELATION_CONFIG = { - "core_bugs": { - "fields": [ - {"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"}, - {"name": "core_bug", "type": "INTEGER", "mode": "REQUIRED"}, - ], - "source": "depends_on", - "store_id": "core", - }, - "breakage_reports": { - "fields": [ - {"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"}, - {"name": "breakage_bug", "type": "INTEGER", "mode": "REQUIRED"}, - ], - "source": "blocks", - "store_id": "breakage", - }, - "interventions": { - "fields": [ - {"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"}, - {"name": "code_url", "type": "STRING", "mode": "REQUIRED"}, - ], - "source": "see_also", - "condition": INTERVENTIONS, - }, - "other_browser_issues": { - "fields": [ - {"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"}, - {"name": "issue_url", "type": "STRING", "mode": "REQUIRED"}, - ], - "source": "see_also", - "condition": OTHER_BROWSER, - }, - "standards_issues": { - "fields": [ - {"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"}, - {"name": "issue_url", "type": "STRING", "mode": "REQUIRED"}, - ], - "source": "see_also", - "condition": STANDARDS_ISSUES, - }, - "standards_positions": { - "fields": [ - {"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"}, - {"name": "discussion_url", "type": "STRING", "mode": "REQUIRED"}, - ], - "source": "see_also", - "condition": STANDARDS_POSITIONS, - }, -} -ETP_RELATION_CONFIG = { - "etp_breakage_reports": { - "fields": [ - {"name": "breakage_bug", "type": "INTEGER", "mode": "REQUIRED"}, - {"name": "etp_meta_bug", "type": "INTEGER", "mode": "REQUIRED"}, - ], - "source": "depends_on", - "store_id": "breakage", - }, -} +@dataclass +class BugLinkConfig: + table_name: str + from_field_name: str + to_field_name: str + -LINK_FIELDS = ["other_browser_issues", "standards_issues", "standards_positions"] -PLATFORM_RELATION_CONFIG = {key: RELATION_CONFIG[key] for key in LINK_FIELDS} +@dataclass +class ExternalLinkConfig: + table_name: str + field_name: str + match_substrs: list[str] + + def get_links( + self, all_bugs: BugsById, kb_bugs: set[BugId] + ) -> Mapping[BugId, set[str]]: + rv: defaultdict[int, set[str]] = defaultdict(set) + for kb_bug_id in kb_bugs: + bug_ids = [kb_bug_id] + all_bugs[kb_bug_id].depends_on + for bug_id in bug_ids: + if bug_id not in all_bugs: + continue + bug = all_bugs[bug_id] + for entry in bug.see_also: + if any(substr in entry for substr in self.match_substrs): + rv[kb_bug_id].add(entry) + return rv + + +EXTERNAL_LINK_CONFIGS = { + config.table_name: config + for config in [ + ExternalLinkConfig( + "interventions", + "code_url", + ["github.com/mozilla-extensions/webcompat-addon"], + ), + ExternalLinkConfig( + "other_browser_issues", + "issue_url", + ["bugs.chromium.org", "bugs.webkit.org", "crbug.com"], + ), + ExternalLinkConfig( + "standards_issues", + "issue_url", + ["github.com/w3c", "github.com/whatwg", "github.com/wicg"], + ), + ExternalLinkConfig( + "standards_positions", "discussion_url", ["standards-positions"] + ), + ] +} -def extract_int_from_field(field: Optional[str]) -> Optional[int]: - if field: - if field.lower() in FIELD_MAP: - return FIELD_MAP[field.lower()] +def extract_int_from_field( + field_value: Optional[str], value_map: Optional[Mapping[str, Optional[int]]] = None +) -> Optional[int]: + if field_value: + if value_map and field_value.lower() in value_map: + return value_map[field_value.lower()] - match = re.search(r"\d+", field) + match = re.search(r"\d+", field_value) if match: return int(match.group()) - + logging.warning( + f"Unexpected field value '{field_value}', could not convert to integer" + ) return None -def parse_string_to_json(input_string: str) -> Union[str, Mapping[str, Any]]: +def parse_user_story(input_string: str) -> Mapping[str, str | list[str]]: if not input_string: - return "" + return {} lines = input_string.splitlines() - result_dict: dict[str, Any] = {} + result_dict: dict[str, str | list[str]] = {} for line in lines: if line: @@ -239,39 +396,34 @@ def parse_string_to_json(input_string: str) -> Union[str, Mapping[str, Any]]: if len(key_value) == 2: key, value = key_value if key in result_dict: - if isinstance(result_dict[key], list): - result_dict[key].append(value) + current_value = result_dict[key] + if isinstance(current_value, list): + current_value.append(value) else: - result_dict[key] = [result_dict[key], value] + result_dict[key] = [current_value, value] else: result_dict[key] = value if not result_dict: - return "" + return {} return result_dict -class BugzillaToBigQuery: - def __init__( - self, - client: bigquery.Client, - bq_dataset_id: str, - bugzilla_api_key: Optional[str], - write: bool, - include_history: bool, - recreate_history: bool, - ): - bz_config = bugdantic.BugzillaConfig( - BUGZILLA_URL, bugzilla_api_key, allow_writes=write - ) - self.bz_client = bugdantic.Bugzilla(bz_config) - self.client = client - self.bq_dataset_id = bq_dataset_id - self.write = write - self.include_history = include_history - self.recreate_history = recreate_history +class BugCache(Mapping): + def __init__(self, bz_client: bugdantic.Bugzilla): + self.bz_client = bz_client + self.bugs: MutBugsById = {} + + def __getitem__(self, key: BugId) -> Bug: + return self.bugs[key] + + def __len__(self) -> int: + return len(self.bugs) - def fetch_bugs(self, params: dict[str, str]) -> tuple[bool, MutBugsById]: + def __iter__(self) -> Iterator[BugId]: + yield from self.bugs + + def bz_fetch_bugs(self, params: dict[str, str]) -> None: fields = [ "id", "summary", @@ -293,504 +445,276 @@ def fetch_bugs(self, params: dict[str, str]) -> tuple[bool, MutBugsById]: "cf_last_resolved", "last_change_time", "whiteboard", - "creator", "cf_webcompat_priority", "cf_webcompat_score", ] try: - bugs = self.bz_client.search(query=params, include_fields=fields) - data: MutBugsById = {} + bugs = self.bz_client.search( + query=params, include_fields=fields, page_size=200 + ) for bug in bugs: assert bug.id is not None - data[bug.id] = bug.to_dict() - fetch_completed = True + self.bugs[bug.id] = Bug.from_bugzilla(bug) except Exception as e: logging.error(f"Error: {e}") - fetch_completed = False - data = {} - - return fetch_completed, data - - def kb_bugs_from_platform_bugs( - self, - platform_bugs: BugsById, - kb_ids: set[int], - site_report_ids: set[int], - ) -> BugsById: - """Get a list of platform bugs that should also be considered knowledge base bugs - - These are platform bugs that aren't blocking an existing kb entry - """ - - filtered = {} - - for bug_id, source_bug in platform_bugs.items(): - # Check if the platform bug already has a kb entry and skip if so - if any(blocked_id in kb_ids for blocked_id in source_bug["blocks"]): - continue - - bug = {**source_bug} - - # Only store a breakage bug as it's the relation we care about - bug["blocks"] = [ - blocked_id - for blocked_id in bug["blocks"] - if blocked_id in site_report_ids - ] - - # Don't store bugs that platform bug depends on - bug["depends_on"] = [] - - filtered[bug_id] = bug - - return filtered - - def unify_etp_dependencies( - self, - etp_reports: BugsById, - etp_dependencies: BugsById, - ) -> BugsById: - """Unify blocked and depends_on for each ETP bug as their dependencies are inconsistent, - keep only ETP meta bugs and store them only in depends_on field.""" - - filtered = {} - - for bug_id, source_bug in etp_reports.items(): - bug = {**source_bug} - - blocks = [ - blocked_id - for blocked_id in bug["blocks"] - if blocked_id in etp_dependencies - and "meta" in etp_dependencies[blocked_id]["keywords"] - ] - - depends = [ - depends_id - for depends_id in bug["depends_on"] - if depends_id in etp_dependencies - and "meta" in etp_dependencies[depends_id]["keywords"] - ] - - bug["depends_on"] = blocks + depends - bug["blocks"] = [] - - filtered[bug_id] = bug - - return filtered - - def chunked_list(self, data: list[int], size: int) -> Iterator[list[int]]: - for i in range(0, len(data), size): - yield data[i : i + size] - - def fetch_by_id(self, bug_ids: set[int]) -> tuple[bool, MutBugsById]: - chunk_size = 400 - all_bugs: dict[int, Bug] = {} - all_completed = True + raise - for chunk in self.chunked_list(list(bug_ids), chunk_size): - logging.info(f"Fetching {len(chunk)} bugs") - - completed, bugs = self.fetch_bugs({"id": ",".join(map(str, chunk))}) - if completed: - all_bugs.update(bugs) - else: - all_completed = False - break - - return all_completed, all_bugs - - def fetch_related_bugs( - self, bugs: BugsById, relations: list[str], all_bugs: MutBugsById - ) -> tuple[bool, MutBugsById]: - related_ids: set[int] = set() - related_bugs: dict[int, Bug] = {} + def fetch_missing_relations(self, bugs: BugsById, relation: str) -> int: + related_ids: set[str] = set() for bug in bugs.values(): - for relation_property in relations: - related_ids |= set( - bug_id - for bug_id in bug[relation_property] - if bug_id not in all_bugs - ) - related_bugs.update( - { - bug_id: all_bugs[bug_id] - for bug_id in bug[relation_property] - if bug_id in all_bugs - } - ) - - completed, fetched_bugs = self.fetch_by_id(related_ids) - all_bugs.update(fetched_bugs) - related_bugs.update(fetched_bugs) - return completed, related_bugs + related_ids |= { + str(bug_id) for bug_id in getattr(bug, relation) if bug_id not in self + } - def filter_core_bugs(self, bugs: BugsById) -> BugsById: - return { - bug_id: bug - for bug_id, bug in bugs.items() - if bug["product"] != "Web Compatibility" - } + if related_ids: + self.bz_fetch_bugs({"id": ",".join(related_ids)}) + return len(related_ids) - def fetch_all_bugs( - self, - ) -> Optional[ - tuple[ - MutBugsById, MutBugsById, MutBugsById, MutBugsById, MutBugsById, MutBugsById - ] - ]: - """Get all the bugs that should be imported into BigQuery. - - :returns: A tuple of (all bugs, site report bugs, knowledge base bugs, - core bugs, ETP report bugs, ETP dependencies).""" - fetched_bugs = {} - all_bugs: dict[int, Bug] = {} - - for category, filter_config in FILTER_CONFIG.items(): - logging.info(f"Fetching {category} bugs") - completed, fetched_bugs[category] = self.fetch_bugs(filter_config) - all_bugs.update(fetched_bugs[category]) - if not completed: - return None - - site_reports = fetched_bugs["site_reports_wc"] - site_reports.update(fetched_bugs["site_reports_other"]) - - logging.info("Fetching site-report blocking bugs") - completed_site_report_deps, site_reports_deps = self.fetch_related_bugs( - site_reports, ["depends_on"], all_bugs - ) + def into_mapping(self) -> BugsById: + """Convert the data into a plain dict. - if not completed_site_report_deps: - logging.error("Failed to fetch site report blocking bugs") - return None + Also reset this object, so we aren't sharing the state between multiple places + """ + bugs = self.bugs + self.bugs = {} + return bugs + + +def is_site_report(bug: Bug) -> bool: + return (bug.product == "Web Compatibility" and bug.component == "Site Reports") or ( + bug.product != "Web Compatibility" and "webcompat:site-report" in bug.keywords + ) + + +def is_etp_report(bug: Bug) -> bool: + return ( + bug.product == "Web Compatibility" and bug.component == "Privacy: Site Reports" + ) + + +def is_kb_entry(bug: Bug) -> bool: + """Get things that are directly in the knowledge base. + + This doesn't include core bugs that should be considered part of the knowledge base + because they directly block a platform bug.""" + return bug.product == "Web Compatibility" and bug.component == "Knowledge Base" + + +def is_webcompat_platform_bug(bug: Bug) -> bool: + """Check if a bug is a platform bug . + + These are only actually in the kb if they also block a site report""" + return ( + bug.product != "Web Compatibility" and "webcompat:platform-bug" in bug.keywords + ) + + +def get_kb_bug_core_bugs( + all_bugs: BugsById, kb_bugs: set[BugId], platform_bugs: set[BugId] +) -> Mapping[BugId, set[BugId]]: + rv = defaultdict(set) + for kb_id in kb_bugs: + if kb_id not in platform_bugs: + for bug_id in all_bugs[kb_id].depends_on: + if bug_id in platform_bugs: + rv[kb_id].add(bug_id) + return rv + + +def get_kb_bug_site_report( + all_bugs: BugsById, kb_bugs: set[BugId], site_report_bugs: set[BugId] +) -> Mapping[BugId, set[BugId]]: + rv = defaultdict(set) + for kb_id in kb_bugs: + if kb_id in site_report_bugs: + rv[kb_id].add(kb_id) + for bug_id in all_bugs[kb_id].blocks: + if bug_id in site_report_bugs: + rv[kb_id].add(bug_id) + return rv + + +def get_etp_breakage_reports( + all_bugs: BugsById, etp_reports: set[BugId] +) -> Mapping[BugId, set[BugId]]: + rv = {} + for bug_id in etp_reports: + report_bug = all_bugs[bug_id] + meta_bugs = { + meta_id + for meta_id in report_bug.depends_on + report_bug.blocks + if "meta" in all_bugs[meta_id].keywords + } + if meta_bugs: + rv[bug_id] = meta_bugs + return rv - kb_bugs = fetched_bugs["knowledge_base"] - logging.info("Fetching blocking bugs for KB bugs") - completed_platform_bugs, kb_deps = self.fetch_related_bugs( - kb_bugs, ["depends_on"], all_bugs - ) +def fetch_all_bugs( + bz_client: bugdantic.Bugzilla, +) -> BugsById: + """Get all the bugs that should be imported into BigQuery. - if not completed: - logging.error("Failed to fetch blocking bugs") - return None + :returns: A tuple of (all bugs, site report bugs, knowledge base bugs, + core bugs, ETP report bugs, ETP dependencies).""" - platform_bugs = fetched_bugs["platform_bugs"] - platform_bugs.update(self.filter_core_bugs(site_reports_deps)) - platform_bugs.update(self.filter_core_bugs(kb_deps)) + bug_cache = BugCache(bz_client) - etp_reports = fetched_bugs["site_reports_etp"] + for category, filter_config in BUG_QUERIES.items(): + logging.info(f"Fetching {category} bugs") + bug_cache.bz_fetch_bugs(filter_config) - etp_completed, etp_dependencies = self.fetch_related_bugs( - etp_reports, ["depends_on", "blocks"], all_bugs + fetch_count = -1 + while fetch_count != 0: + # Get all blocking bugs for site reports or kb entries or etp site reports + # This can take more than one iteration if dependencies themselves turn out + # to be site reports that were excluded by a the date cutoff + fetch_count = bug_cache.fetch_missing_relations( + { + bug_id: bug + for bug_id, bug in bug_cache.items() + if is_site_report(bug) or is_kb_entry(bug) or is_etp_report(bug) + }, + "depends_on", ) - - if not etp_completed: - logging.error("Failed to fetch etp blocking bugs") - return None - - return ( - all_bugs, - site_reports, - kb_bugs, - platform_bugs, - etp_reports, - etp_dependencies, + fetch_count += bug_cache.fetch_missing_relations( + {bug_id: bug for bug_id, bug in bug_cache.items() if is_etp_report(bug)}, + "blocks", ) + if fetch_count: + logging.info(f"Fetched {fetch_count} related bugs") - def process_relations( - self, bugs: BugsById, relation_config: RelationConfig - ) -> tuple[Mapping[int, Mapping[str, list[int | str]]], Mapping[str, set[int]]]: - """Build relationship tables based on information in the bugs. - - :returns: A mapping {bug_id: {relationship name: [related items]}} and - a mapping {store id: {bug ids}} - """ - # The types here are wrong; the return values are lists of ints or lists of strings but not both. - # However enforcing that property is hard without building specific types for the two cases - relations: dict[int, dict[str, list[int | str]]] = {} - related_bug_ids: dict[str, set[int]] = {} - - for config in relation_config.values(): - if "store_id" in config: - related_bug_ids[config["store_id"]] = set() - - for bug_id, bug in bugs.items(): - relations[bug_id] = {rel: [] for rel in relation_config.keys()} + return bug_cache.into_mapping() - for rel, config in relation_config.items(): - related_items = bug[config["source"]] - for item in related_items: - if "condition" in config and not any( - c in item for c in config["condition"] - ): - continue - - relations[bug_id][rel].append(item) - - if config.get("store_id"): - assert isinstance(item, int) - related_bug_ids[config["store_id"]].add(item) - - return relations, related_bug_ids - - def add_kb_entry_breakage( +class BugHistoryUpdater: + def __init__( self, - kb_data: Mapping[int, Mapping[str, list[int | str]]], - kb_dep_ids: Mapping[str, set[int]], - site_reports: BugsById, - ) -> None: - """Add breakage relations for bugs that are both kb entries and also site reports - - If a core bug has the webcompat:platform-bug keyword it's a kb entry. - If it also has the webcompat:site-report keyword it's a site report. - In this case we want the bug to reference itself in the breakage_reports table.""" - for bug_id in set(kb_data.keys()) & set(site_reports.keys()): - if bug_id not in kb_dep_ids["breakage"]: - kb_data[bug_id]["breakage_reports"].append(bug_id) - kb_dep_ids["breakage"].add(bug_id) - - def fetch_missing_deps( - self, all_bugs: BugsById, kb_dep_ids: Mapping[str, set[int]] - ) -> Optional[tuple[BugsById, BugsById]]: - dep_ids = {item for sublist in kb_dep_ids.values() for item in sublist} - - # Check for missing bugs - missing_ids = dep_ids - set(all_bugs.keys()) - - if missing_ids: - logging.info( - "Fetching missing core bugs and breakage reports from Bugzilla" - ) - completed, missing_bugs = self.fetch_bugs( - {"id": ",".join(map(str, missing_ids))} - ) - if not completed: - return None + bq_client: bigquery.Client, + bq_dataset_id: str, + bz_client: bugdantic.Bugzilla, + ): + self.bq_client = bq_client + self.bq_dataset_id = bq_dataset_id + self.bz_client = bz_client - # Separate core bugs for updating relations. - core_dependenies = set(kb_dep_ids.get("core", set())) - core_missing = { - bug_id: bug - for bug_id, bug in missing_bugs.items() - if bug_id in core_dependenies - } + def run(self, all_bugs: BugsById, recreate: bool) -> HistoryByBug: + if not recreate: + existing_records = self.bigquery_fetch_history(all_bugs.keys()) + new_bugs, existing_bugs = self.group_bugs(all_bugs) + existing_bugs_history = self.missing_records( + existing_records, self.existing_bugs_history(existing_bugs) + ) else: - missing_bugs, core_missing = {}, {} - - return missing_bugs, core_missing + existing_records = {} + new_bugs = all_bugs + existing_bugs_history = {} - def add_links( - self, - kb_processed: Mapping[int, Mapping[str, list[int | str]]], - dep_processed: Mapping[int, Mapping[str, list[int | str]]], - ) -> Mapping[int, Mapping[str, list[int | str]]]: - """Create links between kb entries and external data such - as standards issues.""" - result = {**kb_processed} - - for kb_bug_id in result: - for core_bug_id in result[kb_bug_id]["core_bugs"]: - assert isinstance(core_bug_id, int) - for sub_key in LINK_FIELDS: - if sub_key in result[kb_bug_id] and sub_key in dep_processed.get( - core_bug_id, {} - ): - for link_item in dep_processed[core_bug_id][sub_key]: - if link_item not in result[kb_bug_id][sub_key]: - result[kb_bug_id][sub_key].append(link_item) + new_bugs_history = self.missing_records( + existing_records, self.new_bugs_history(new_bugs) + ) - return result + if not (new_bugs_history or existing_bugs_history): + logging.info("No relevant history updates") + return {} - def build_relations( - self, bugs: BugsById, relation_config: RelationConfig - ) -> Relations: - relations: dict[str, list[Mapping[str, Any]]] = { - key: [] for key in relation_config.keys() - } + return self.merge_history(existing_bugs_history, new_bugs_history) - for bug_id, bug in bugs.items(): - for field_key, items in bug.items(): - fields = relation_config[field_key]["fields"] + def group_bugs(self, all_bugs: BugsById) -> tuple[BugsById, BugsById]: + all_ids = set(all_bugs.keys()) + existing_ids = self.bigquery_fetch_imported_ids() + new_ids = all_ids - existing_ids - for row in items: - relation_row = {fields[0]["name"]: bug_id, fields[1]["name"]: row} - relations[field_key].append(relation_row) + new_bugs = { + bug_id: bug for bug_id, bug in all_bugs.items() if bug_id in new_ids + } + existing_bugs = { + bug_id: bug for bug_id, bug in all_bugs.items() if bug_id not in new_ids + } + return new_bugs, existing_bugs - return relations + def merge_history(self, *sources: HistoryByBug) -> HistoryByBug: + history: defaultdict[BugId, list[BugHistoryEntry]] = defaultdict(list) + for source in sources: + for bug_id, changes in source.items(): + history[bug_id].extend(changes) + return history - def convert_bug_data(self, bug: Bug) -> dict[str, Any]: - resolved = None - if bug["status"] in ["RESOLVED", "VERIFIED"] and bug["cf_last_resolved"]: - resolved = bug["cf_last_resolved"] + def new_bugs_history(self, new_bugs: BugsById) -> HistoryByBug: + history = self.bugzilla_fetch_history(new_bugs.keys()) + synthetic_history = self.create_initial_history_entry(new_bugs, history) + return self.merge_history(history, synthetic_history) - user_story = parse_string_to_json(bug["cf_user_story"]) + def existing_bugs_history(self, existing_bugs: BugsById) -> HistoryByBug: + last_import_time = self.bigquery_last_import() - assigned_to = ( - bug["assigned_to"] if bug["assigned_to"] != "nobody@mozilla.org" else None - ) - webcompat_priority = ( - bug.get("cf_webcompat_priority") - if bug.get("cf_webcompat_priority") != "---" - else None - ) + if last_import_time is None: + logging.info("No previous history update found") + return {} - return { - "number": bug["id"], - "title": bug["summary"], - "status": bug["status"], - "resolution": bug["resolution"], - "product": bug["product"], - "component": bug["component"], - "creator": bug["creator"], - "severity": extract_int_from_field(bug["severity"]), - "priority": extract_int_from_field(bug["priority"]), - "creation_time": bug["creation_time"].isoformat(), - "assigned_to": assigned_to, - "keywords": bug["keywords"], - "url": bug["url"], - "user_story": user_story, - "user_story_raw": bug["cf_user_story"], - "resolved_time": resolved.isoformat() if resolved is not None else None, - "whiteboard": bug["whiteboard"], - "webcompat_priority": webcompat_priority, - "webcompat_score": extract_int_from_field(bug.get("cf_webcompat_score")), + updated_bugs = { + bug_id + for bug_id, bug in existing_bugs.items() + if bug.last_change_time > last_import_time } - def update_bugs(self, bugs: BugsById) -> None: - res = [self.convert_bug_data(bug) for bug in bugs.values()] - - job_config = bigquery.LoadJobConfig( - source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, - schema=[ - bigquery.SchemaField("number", "INTEGER", mode="REQUIRED"), - bigquery.SchemaField("title", "STRING", mode="REQUIRED"), - bigquery.SchemaField("status", "STRING", mode="REQUIRED"), - bigquery.SchemaField("resolution", "STRING", mode="REQUIRED"), - bigquery.SchemaField("product", "STRING", mode="REQUIRED"), - bigquery.SchemaField("component", "STRING", mode="REQUIRED"), - bigquery.SchemaField("creator", "STRING", mode="REQUIRED"), - bigquery.SchemaField("severity", "INTEGER"), - bigquery.SchemaField("priority", "INTEGER"), - bigquery.SchemaField("creation_time", "TIMESTAMP", mode="REQUIRED"), - bigquery.SchemaField("assigned_to", "STRING"), - bigquery.SchemaField("keywords", "STRING", mode="REPEATED"), - bigquery.SchemaField("url", "STRING"), - bigquery.SchemaField("user_story", "JSON"), - bigquery.SchemaField("user_story_raw", "STRING"), - bigquery.SchemaField("resolved_time", "TIMESTAMP"), - bigquery.SchemaField("whiteboard", "STRING"), - bigquery.SchemaField("webcompat_priority", "STRING"), - bigquery.SchemaField("webcompat_score", "INTEGER"), - ], - write_disposition="WRITE_TRUNCATE", - ) - - bugs_table = f"{self.bq_dataset_id}.bugzilla_bugs" + if not updated_bugs: + logging.info(f"No updated bugs since {last_import_time.isoformat()}") + return {} - job = self.client.load_table_from_json( - res, - bugs_table, - job_config=job_config, + logging.info( + f"Fetching bugs {updated_bugs} updated since {last_import_time.isoformat()}" ) - logging.info("Writing to `bugzilla_bugs` table") - - try: - job.result() - except Exception as e: - print(f"ERROR: {e}") - if job.errors: - for error in job.errors: - logging.error(error) - - table = self.client.get_table(bugs_table) - logging.info(f"Loaded {table.num_rows} rows into {table}") - - def update_kb_ids(self, ids: Iterable[int]) -> None: - res = [{"number": kb_id} for kb_id in ids] - - job_config = bigquery.LoadJobConfig( - source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, - schema=[ - bigquery.SchemaField("number", "INTEGER", mode="REQUIRED"), - ], - write_disposition="WRITE_TRUNCATE", - ) - - kb_bugs_table = f"{self.bq_dataset_id}.kb_bugs" - - job = self.client.load_table_from_json( - res, - kb_bugs_table, - job_config=job_config, - ) + bugs_full_history = self.bugzilla_fetch_history(updated_bugs) + # Filter down to only recent updates, since we always get the full history + bugs_history = {} + for bug_id, bug_full_history in bugs_full_history.items(): + bug_history = [ + item for item in bug_full_history if item.change_time > last_import_time + ] + if bug_history: + bugs_history[bug_id] = bug_history - logging.info("Writing to `kb_bugs` table") + return bugs_history - try: - job.result() - except Exception as e: - print(f"ERROR: {e}") - if job.errors: - for error in job.errors: - logging.error(error) + def missing_records( + self, existing_records: HistoryByBug, updates: HistoryByBug + ) -> HistoryByBug: + if not existing_records: + return updates - table = self.client.get_table(kb_bugs_table) - logging.info(f"Loaded {table.num_rows} rows into {table}") + existing_history = set(self.flatten_history(existing_records)) + new_history = set(self.flatten_history(updates)) - def update_relations( - self, relations: Relations, relation_config: RelationConfig - ) -> None: - for key, value in relations.items(): - if value: - job_config = bigquery.LoadJobConfig( - source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, - schema=[ - bigquery.SchemaField( - item["name"], item["type"], mode=item["mode"] - ) - for item in relation_config[key]["fields"] - ], - write_disposition="WRITE_TRUNCATE", - ) + diff = new_history - existing_history - relation_table = f"{self.bq_dataset_id}.{key}" - job = self.client.load_table_from_json( - cast(Iterable[dict[str, Any]], value), - relation_table, - job_config=job_config, - ) + return self.unflatten_history(item for item in new_history if item in diff) - logging.info(f"Writing to `{relation_table}` table") + def bigquery_fetch_imported_ids(self) -> set[int]: + query = f""" + SELECT number + FROM `{self.bq_dataset_id}.bugzilla_bugs` + """ + res = self.bq_client.query(query).result() + rows = list(res) - try: - job.result() - except Exception as e: - print(f"ERROR: {e}") - if job.errors: - for error in job.errors: - logging.error(error) + imported_ids = {bug["number"] for bug in rows} - table = self.client.get_table(relation_table) - logging.info(f"Loaded {table.num_rows} rows into {table}") + return imported_ids - def get_last_import_datetime(self) -> Optional[datetime]: + def bigquery_last_import(self) -> Optional[datetime]: query = f""" SELECT MAX(run_at) AS last_run_at FROM `{self.bq_dataset_id}.import_runs` WHERE is_history_fetch_completed = TRUE """ - res = self.client.query(query).result() + res = self.bq_client.query(query).result() row = list(res)[0] return row["last_run_at"] - def fetch_bugs_history( - self, ids: Iterable[int] - ) -> tuple[Mapping[int, list[bugdantic.bugzilla.History]], bool]: + def bugzilla_fetch_history(self, ids: Iterable[int]) -> HistoryByBug: history: dict[int, list[bugdantic.bugzilla.History]] = {} chunk_size = 100 ids_list = list(ids) @@ -836,161 +760,21 @@ def fetch_bugs_history( else: break - completed = len(ids_list) == 0 - return history, completed - - def serialize_history_entry(self, entry: BugHistoryEntry) -> dict[str, Any]: - return { - "number": entry.number, - "who": entry.who, - "change_time": entry.change_time.isoformat(), - "changes": [ - { - "field_name": change.field_name, - "added": change.added, - "removed": change.removed, - } - for change in entry.changes - ], - } - - def update_history( - self, records: list[BugHistoryEntry], recreate: bool = False - ) -> None: - if not records and not recreate: - logging.info("No history records to update") - return - - write_disposition = "WRITE_APPEND" if not recreate else "WRITE_TRUNCATE" - job_config = bigquery.LoadJobConfig( - source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, - schema=[ - bigquery.SchemaField("number", "INTEGER", mode="REQUIRED"), - bigquery.SchemaField("who", "STRING", mode="REQUIRED"), - bigquery.SchemaField("change_time", "TIMESTAMP", mode="REQUIRED"), - bigquery.SchemaField( - "changes", - "RECORD", - mode="REPEATED", - fields=[ - bigquery.SchemaField("field_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("added", "STRING", mode="REQUIRED"), - bigquery.SchemaField("removed", "STRING", mode="REQUIRED"), - ], - ), - ], - write_disposition=write_disposition, - ) - - history_table = f"{self.bq_dataset_id}.bugs_history" - - job = self.client.load_table_from_json( - (self.serialize_history_entry(item) for item in records), - history_table, - job_config=job_config, - ) - - logging.info("Writing to `bugs_history` table") - - try: - job.result() - except Exception as e: - print(f"ERROR: {e}") - if job.errors: - for error in job.errors: - logging.error(error) - - table = self.client.get_table(history_table) - logging.info(f"Loaded {len(records)} rows into {table}") - - def get_existing_history_records_by_ids( - self, bug_ids: Iterable[int] - ) -> list[BugHistoryEntry]: - formatted_numbers = ", ".join(str(bug_id) for bug_id in bug_ids) - - query = f""" - SELECT * - FROM `{self.bq_dataset_id}.bugs_history` - WHERE number IN ({formatted_numbers}) - """ - result = self.client.query(query).result() - return [ - BugHistoryEntry( - row["number"], - row["who"], - row["change_time"], - changes=[ - BugHistoryChange( - change["field_name"], change["added"], change["removed"] - ) - for change in row["changes"] - ], + if len(ids_list) != 0: + raise BugLoadError( + f"Failed to fetch bug history for {','.join(str(item) for item in ids_list)}" ) - for row in result - ] - def flatten_history(self, records: Iterable[BugHistoryEntry]) -> list[HistoryRow]: - history = [] - for record in records: - for change in record.changes: - history_row = HistoryRow( - record.number, - record.who, - record.change_time, - change.field_name, - change.added, - change.removed, - ) - history.append(history_row) + return self.bugzilla_to_history_entry(history) - return history - - def unflatten_history(self, diff: Sequence[HistoryRow]) -> list[BugHistoryEntry]: - changes: dict[tuple[int, str, datetime], BugHistoryEntry] = {} - for item in diff: - key = (item.number, item.who, item.change_time) - - if key not in changes: - changes[key] = BugHistoryEntry( - number=item.number, - who=item.who, - change_time=item.change_time, - changes=[], - ) - changes[key].changes.append( - BugHistoryChange( - field_name=item.field_name, - added=item.added, - removed=item.removed, - ) - ) - - return list(changes.values()) - - def filter_only_unsaved_changes( - self, history_updates: list[BugHistoryEntry], bug_ids: set[int] - ) -> list[BugHistoryEntry]: - existing_records = self.get_existing_history_records_by_ids(bug_ids) - - if not existing_records: - return history_updates - - existing_history = self.flatten_history(existing_records) - new_history = self.flatten_history(history_updates) - - diff = set(new_history) - set(existing_history) - - return self.unflatten_history([item for item in new_history if item in diff]) - - def extract_history_fields( + def bugzilla_to_history_entry( self, updated_history: Mapping[int, list[bugdantic.bugzilla.History]] - ) -> tuple[list[BugHistoryEntry], set[int]]: - result = [] - bug_ids = set() + ) -> HistoryByBug: + rv: dict[int, list[BugHistoryEntry]] = defaultdict(list) for bug_id, history in updated_history.items(): - filtered_changes = [] - + # Need to ensure we have an entry for every bug even if there isn't any history + rv[bug_id] = [] for record in history: relevant_changes = [ BugHistoryChange( @@ -1010,257 +794,347 @@ def extract_history_fields( change_time=record.when, changes=relevant_changes, ) - filtered_changes.append(filtered_record) - bug_ids.add(bug_id) - - if filtered_changes: - result.extend(filtered_changes) + rv[bug_id].append(filtered_record) - return result, bug_ids + return rv - def filter_relevant_history( - self, updated_history: Mapping[int, list[bugdantic.bugzilla.History]] - ) -> list[BugHistoryEntry]: - only_unsaved_changes = [] - result, bug_ids = self.extract_history_fields(updated_history) - - if result: - only_unsaved_changes = self.filter_only_unsaved_changes(result, bug_ids) - - return only_unsaved_changes - - def get_bugs_updated_since_last_import( - self, all_bugs: BugsById, last_import_time: datetime - ) -> set[int]: - return { - bug["id"] - for bug in all_bugs.values() - if bug["last_change_time"] > last_import_time - } - - def get_imported_ids(self) -> set[int]: + def bigquery_fetch_history(self, bug_ids: Iterable[int]) -> HistoryByBug: + rv: defaultdict[int, list[BugHistoryEntry]] = defaultdict(list) + formatted_numbers = ", ".join(str(bug_id) for bug_id in bug_ids) query = f""" - SELECT number - FROM `{self.bq_dataset_id}.bugzilla_bugs` - """ - res = self.client.query(query).result() - rows = list(res) - - imported_ids = {bug["number"] for bug in rows} - - return imported_ids - - def create_keyword_map( - self, history: list[BugHistoryEntry] - ) -> Mapping[int, Mapping[str, Mapping[str, list[datetime]]]]: - keyword_history: dict[int, dict[str, dict[str, list[datetime]]]] = {} - - for record in history: - bug_id = record.number - timestamp = record.change_time - - for change in record.changes: - if change.field_name == "keywords": - if bug_id not in keyword_history: - keyword_history[bug_id] = {"added": {}, "removed": {}} - - keyword_records = keyword_history[bug_id] - - for action in ["added", "removed"]: - keywords = getattr(change, action) - if keywords: - for keyword in keywords.split(", "): - if keyword not in keyword_records[action]: - keyword_records[action][keyword] = [] + SELECT * + FROM `{self.bq_dataset_id}.bugs_history` + WHERE number IN ({formatted_numbers}) + """ + result = self.bq_client.query(query).result() + for row in result: + rv[row["number"]].append( + BugHistoryEntry( + row["number"], + row["who"], + row["change_time"], + changes=[ + BugHistoryChange( + change["field_name"], change["added"], change["removed"] + ) + for change in row["changes"] + ], + ) + ) + return rv + + def keyword_history( + self, history: HistoryByBug + ) -> Mapping[BugId, Mapping[str, PropertyHistory]]: + """Get the time each keyword has been added and removed from each bug""" + keyword_history: defaultdict[int, dict[str, PropertyHistory]] = defaultdict( + dict + ) - keyword_records[action][keyword].append(timestamp) + for bug_id, records in history.items(): + for record in records: + for change in record.changes: + if change.field_name == "keywords": + for src, change_type in [ + (change.added, PropertyChange.added), + (change.removed, PropertyChange.removed), + ]: + if src: + for keyword in src.split(", "): + if keyword not in keyword_history[bug_id]: + keyword_history[bug_id][keyword] = ( + PropertyHistory() + ) + keyword_history[bug_id][keyword].add( + change_time=record.change_time, + change=change_type, + ) return keyword_history - def is_removed_earliest( - self, added_times: list[datetime], removed_times: list[datetime] - ) -> bool: - events = [(at, "added") for at in added_times] + [ - (rt, "removed") for rt in removed_times - ] - events.sort() - - if not events: - return False - - return events[0][1] == "removed" - def get_missing_keywords( self, bug_id: int, current_keywords: list[str], - keyword_history: Mapping[int, Mapping[str, Mapping[str, list[datetime]]]], - ) -> list[str]: - missing_keywords = [] + keyword_history: Mapping[BugId, Mapping[str, PropertyHistory]], + ) -> set[str]: + missing_keywords = set() # Check if keyword exists, but is not in "added" history for keyword in current_keywords: - if bug_id not in keyword_history or keyword not in keyword_history[ - bug_id - ].get("added", {}): - if keyword not in missing_keywords: - missing_keywords.append(keyword) + if bug_id not in keyword_history or keyword not in keyword_history[bug_id]: + missing_keywords.add(keyword) # Check for keywords that have "removed" record as the earliest # event in the sorted timeline if bug_id in keyword_history: - for keyword, removed_times in ( - keyword_history[bug_id].get("removed", {}).items() - ): - added_times = keyword_history[bug_id].get("added", {}).get(keyword, []) - - removed_earliest = self.is_removed_earliest(added_times, removed_times) - - if removed_earliest and keyword not in missing_keywords: - missing_keywords.append(keyword) + for keyword, history in keyword_history[bug_id].items(): + if history.missing_initial_add(): + missing_keywords.add(keyword) return missing_keywords - def build_missing_history( - self, bugs_without_history: Iterable[tuple[Bug, list[str]]] - ) -> list[BugHistoryEntry]: - result: list[BugHistoryEntry] = [] - for bug, missing_keywords in bugs_without_history: - record = BugHistoryEntry( - number=bug["id"], - who=bug["creator"], - change_time=bug["creation_time"], - changes=[ - BugHistoryChange( - added=", ".join(missing_keywords), - field_name="keywords", - removed="", - ) - ], - ) - result.append(record) - return result - - def create_synthetic_history( - self, bugs: BugsById, history: list[BugHistoryEntry] - ) -> list[BugHistoryEntry]: - keyword_history = self.create_keyword_map(history) + def create_initial_history_entry( + self, all_bugs: BugsById, history: HistoryByBug + ) -> HistoryByBug: + """Backfill history entries for bug creation. - bugs_without_history = [] - - for bug_id, bug in bugs.items(): - current_keywords = bug["keywords"] + If a bug has keywords set, but there isn't a history entry corresponding + to the keyword being added, we assume they were set on bug creation, and + create a history entry to represent that.""" + result: dict[int, list[BugHistoryEntry]] = {} + keyword_history = self.keyword_history(history) + for bug_id, bug in all_bugs.items(): missing_keywords = self.get_missing_keywords( - bug_id, current_keywords, keyword_history + bug_id, bug.keywords, keyword_history ) if missing_keywords: - bugs_without_history.append((bug, missing_keywords)) + logging.debug( + f"Adding initial history for bug {bug_id} with keywords {missing_keywords}" + ) + record = BugHistoryEntry( + number=bug.id, + who=bug.creator, + change_time=bug.creation_time, + changes=[ + BugHistoryChange( + added=", ".join(sorted(missing_keywords)), + field_name="keywords", + removed="", + ) + ], + ) + result[bug.id] = [record] + return result - return self.build_missing_history(bugs_without_history) + def flatten_history(self, history: HistoryByBug) -> Iterable[HistoryChange]: + for records in history.values(): + for record in records: + for change in record.changes: + yield HistoryChange( + record.number, + record.who, + record.change_time, + change.field_name, + change.added, + change.removed, + ) - def fetch_history_for_new_bugs( - self, all_bugs: BugsById, recreate: bool = False - ) -> tuple[list[BugHistoryEntry], set[int], bool]: - only_unsaved_changes: list[BugHistoryEntry] = [] + def unflatten_history(self, diff: Iterable[HistoryChange]) -> HistoryByBug: + changes: dict[tuple[int, str, datetime], BugHistoryEntry] = {} + for item in diff: + key = (item.number, item.who, item.change_time) - all_ids = set(all_bugs.keys()) - if not recreate: - existing_ids = self.get_imported_ids() - new_ids = all_ids - existing_ids - else: - new_ids = all_ids + if key not in changes: + changes[key] = BugHistoryEntry( + number=item.number, + who=item.who, + change_time=item.change_time, + changes=[], + ) + changes[key].changes.append( + BugHistoryChange( + field_name=item.field_name, + added=item.added, + removed=item.removed, + ) + ) - logging.info(f"Fetching new bugs history: {list(new_ids)}") + rv: dict[int, list[BugHistoryEntry]] = {} + for change in changes.values(): + if change.number not in rv: + rv[change.number] = [] + rv[change.number].append(change) + return rv - new_bugs = { - bug_id: bug for bug_id, bug in all_bugs.items() if bug_id in new_ids - } - bugs_history, completed = self.fetch_bugs_history(new_bugs.keys()) - if not completed: - return only_unsaved_changes, new_ids, False +class BigQueryImporter: + """Class to handle all writes to BigQuery""" - history, _ = self.extract_history_fields(bugs_history) + def __init__(self, client: bigquery.Client, bq_dataset_id: str, write: bool): + self.client = client + self.bq_dataset_id = bq_dataset_id + self.write = write - synthetic_history = self.create_synthetic_history(new_bugs, history) + def write_table( + self, + table: str, + schema: list[bigquery.SchemaField], + rows: Sequence[Mapping[str, Any]], + overwrite: bool, + ) -> None: + ensure_table(self.client, self.bq_dataset_id, table, schema, False) + table = f"{self.bq_dataset_id}.{table}" - new_bugs_history = history + synthetic_history + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + schema=schema, + write_disposition="WRITE_APPEND" if not overwrite else "WRITE_TRUNCATE", + ) - if new_bugs_history: - only_unsaved_changes = self.filter_only_unsaved_changes( - new_bugs_history, new_ids + if self.write: + job = self.client.load_table_from_json( + cast(Iterable[dict[str, Any]], rows), + table, + job_config=job_config, ) + job.result() + logging.info(f"Wrote {len(rows)} records into {table}") + else: + logging.info(f"Skipping writes, would have written {len(rows)} to {table}") + for row in rows: + logging.debug(f" {row}") - return only_unsaved_changes, new_ids, True - - def fetch_history_updates( - self, all_existing_bugs: BugsById - ) -> tuple[Mapping[int, list[bugdantic.bugzilla.History]], bool]: - last_import_time = self.get_last_import_datetime() + def convert_bug(self, bug: Bug) -> Mapping[str, Any]: + return { + "number": bug.id, + "title": bug.summary, + "status": bug.status, + "resolution": bug.resolution, + "product": bug.product, + "component": bug.component, + "creator": bug.creator, + "severity": bug.severity, + "priority": bug.priority, + "creation_time": bug.creation_time.isoformat(), + "assigned_to": bug.assigned_to, + "keywords": bug.keywords, + "url": bug.url, + "user_story": bug.parsed_user_story, + "user_story_raw": bug.user_story, + "resolved_time": bug.resolved.isoformat() + if bug.resolved is not None + else None, + "whiteboard": bug.whiteboard, + "webcompat_priority": bug.webcompat_priority, + "webcompat_score": bug.webcompat_score, + "depends_on": bug.depends_on, + "blocks": bug.blocks, + } - if last_import_time is not None: - updated_bug_ids = self.get_bugs_updated_since_last_import( - all_existing_bugs, last_import_time - ) + def convert_history_entry(self, entry: BugHistoryEntry) -> Mapping[str, Any]: + return { + "number": entry.number, + "who": entry.who, + "change_time": entry.change_time.isoformat(), + "changes": [ + { + "field_name": change.field_name, + "added": change.added, + "removed": change.removed, + } + for change in entry.changes + ], + } - logging.info( - f"Fetching bugs updated after last import: {updated_bug_ids} at {last_import_time.strftime('%Y-%m-%dT%H:%M:%SZ')}" # noqa - ) + def insert_bugs(self, all_bugs: BugsById) -> None: + table = "bugzilla_bugs" + schema = [ + bigquery.SchemaField("number", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("title", "STRING", mode="REQUIRED"), + bigquery.SchemaField("status", "STRING", mode="REQUIRED"), + bigquery.SchemaField("resolution", "STRING", mode="REQUIRED"), + bigquery.SchemaField("product", "STRING", mode="REQUIRED"), + bigquery.SchemaField("component", "STRING", mode="REQUIRED"), + bigquery.SchemaField("creator", "STRING", mode="REQUIRED"), + bigquery.SchemaField("severity", "INTEGER"), + bigquery.SchemaField("priority", "INTEGER"), + bigquery.SchemaField("creation_time", "TIMESTAMP", mode="REQUIRED"), + bigquery.SchemaField("assigned_to", "STRING"), + bigquery.SchemaField("keywords", "STRING", mode="REPEATED"), + bigquery.SchemaField("url", "STRING"), + bigquery.SchemaField("user_story", "JSON"), + bigquery.SchemaField("user_story_raw", "STRING"), + bigquery.SchemaField("resolved_time", "TIMESTAMP"), + bigquery.SchemaField("whiteboard", "STRING"), + bigquery.SchemaField("webcompat_priority", "STRING"), + bigquery.SchemaField("webcompat_score", "INTEGER"), + bigquery.SchemaField("depends_on", "INTEGER", mode="REPEATED"), + bigquery.SchemaField("blocks", "INTEGER", mode="REPEATED"), + ] + rows = [self.convert_bug(bug) for bug in all_bugs.values()] + self.write_table(table, schema, rows, overwrite=True) - if updated_bug_ids: - bugs_full_history, completed = self.fetch_bugs_history(updated_bug_ids) - # Filter down to only recent updates, since we always get the full history - bugs_history = {} - for bug_id, bug_full_history in bugs_full_history.items(): - bug_history = [ - item - for item in bug_full_history - if item.when > last_import_time - ] - if bug_history: - bugs_history[bug_id] = bug_history - - return bugs_history, completed - - logging.warning("No previous history update found") - - return {}, True - - def fetch_bug_history( - self, all_bugs: BugsById, recreate: bool = False - ) -> tuple[list[BugHistoryEntry], bool]: - filtered_new_history, new_ids, completed = self.fetch_history_for_new_bugs( - all_bugs, recreate - ) - if not completed: - return [], False + def insert_history_changes( + self, history_entries: HistoryByBug, recreate: bool + ) -> None: + schema = [ + bigquery.SchemaField("number", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("who", "STRING", mode="REQUIRED"), + bigquery.SchemaField("change_time", "TIMESTAMP", mode="REQUIRED"), + bigquery.SchemaField( + "changes", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("field_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("added", "STRING", mode="REQUIRED"), + bigquery.SchemaField("removed", "STRING", mode="REQUIRED"), + ], + ), + ] - existing_bugs = { - bug_id: bug for bug_id, bug in all_bugs.items() if bug_id not in new_ids - } + rows = [ + self.convert_history_entry(entry) + for entries in history_entries.values() + for entry in entries + ] + self.write_table("bugs_history", schema, rows, overwrite=recreate) - existing_bugs_history, completed = self.fetch_history_updates(existing_bugs) - if not completed: - return [], False + def insert_bug_list( + self, table_name: str, field_name: str, bugs: Iterable[BugId] + ) -> None: + schema = [bigquery.SchemaField(field_name, "INTEGER", mode="REQUIRED")] + rows = [{field_name: bug_id} for bug_id in bugs] + self.write_table(table_name, schema, rows, overwrite=True) - if filtered_new_history or existing_bugs_history: - filtered_existing = self.filter_relevant_history(existing_bugs_history) - filtered_records = filtered_existing + filtered_new_history - return filtered_records, True + def insert_bug_links( + self, link_config: BugLinkConfig, links_by_bug: Mapping[BugId, Iterable[BugId]] + ) -> None: + schema = [ + bigquery.SchemaField( + link_config.from_field_name, "INTEGER", mode="REQUIRED" + ), + bigquery.SchemaField(link_config.to_field_name, "INTEGER", mode="REQUIRED"), + ] + rows = [ + { + link_config.from_field_name: from_bug_id, + link_config.to_field_name: to_bug_id, + } + for from_bug_id, to_bug_ids in links_by_bug.items() + for to_bug_id in to_bug_ids + ] + self.write_table(link_config.table_name, schema, rows, overwrite=True) - logging.info("No relevant history updates") - return [], True + def insert_external_links( + self, + link_config: ExternalLinkConfig, + links_by_bug: Mapping[BugId, Iterable[str]], + ) -> None: + schema = [ + bigquery.SchemaField("knowledge_base_bug", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField(link_config.field_name, "STRING", mode="REQUIRED"), + ] + rows = [ + {"knowledge_base_bug": bug_id, link_config.field_name: link_text} + for bug_id, links in links_by_bug.items() + for link_text in links + ] + self.write_table(link_config.table_name, schema, rows, overwrite=True) def record_import_run( self, start_time: float, - history_fetch_completed: bool, count: int, - history_count: int, + history_count: Optional[int], last_change_time: datetime, ) -> None: + if not self.write: + return + elapsed_time = time.monotonic() - start_time elapsed_time_delta = timedelta(seconds=elapsed_time) run_at = last_change_time - elapsed_time_delta @@ -1270,8 +1144,10 @@ def record_import_run( { "run_at": formatted_time, "bugs_imported": count, - "bugs_history_updated": history_count, - "is_history_fetch_completed": history_fetch_completed, + "bugs_history_updated": history_count + if history_count is not None + else 0, + "is_history_fetch_completed": history_count is not None, }, ] bugbug_runs_table = f"{self.bq_dataset_id}.import_runs" @@ -1281,103 +1157,185 @@ def record_import_run( else: logging.info("Last import run recorded") - def run(self) -> None: - start_time = time.monotonic() - fetch_all_result = self.fetch_all_bugs() +def get_kb_entries(all_bugs: BugsById, site_report_blockers: set[BugId]) -> set[BugId]: + direct_kb_entries = {bug_id for bug_id, bug in all_bugs.items() if is_kb_entry(bug)} + kb_blockers = { + dependency + for bug_id in direct_kb_entries + for dependency in all_bugs[bug_id].depends_on + } + # We include any bug that's blocking a site report but isn't in Web Compatibility + platform_site_report_blockers = { + bug_id + for bug_id in site_report_blockers + if bug_id not in kb_blockers and all_bugs[bug_id].product != "Web Compatibility" + } + # We also include all other bugs that are platform bugs but don't depend on a kb entry + # TODO: This is probably too many bugs; platform bugs that don't block any site reports + # should likely be excluded + platform_kb_entries = { + bug_id + for bug_id in all_bugs + if bug_id not in kb_blockers and is_webcompat_platform_bug(all_bugs[bug_id]) + } + return direct_kb_entries | platform_site_report_blockers | platform_kb_entries + + +def group_bugs( + all_bugs: BugsById, +) -> tuple[set[BugId], set[BugId], set[BugId], set[BugId]]: + """Extract groups of bugs according to their types""" + site_reports = {bug_id for bug_id, bug in all_bugs.items() if is_site_report(bug)} + etp_reports = {bug_id for bug_id, bug in all_bugs.items() if is_etp_report(bug)} + site_report_blockers = { + dependency + for bug_id in site_reports + for dependency in all_bugs[bug_id].depends_on + } + assert site_report_blockers.issubset(all_bugs.keys()) + kb_bugs = get_kb_entries(all_bugs, site_report_blockers) + platform_bugs = { + bug_id for bug_id in all_bugs if all_bugs[bug_id].product != "Web Compatibility" + } + return site_reports, etp_reports, kb_bugs, platform_bugs + + +def write_bugs( + path: str, + all_bugs: BugsById, + site_reports: set[BugId], + etp_reports: set[BugId], + kb_bugs: set[BugId], + platform_bugs: set[BugId], + bug_links: Iterable[tuple[BugLinkConfig, Mapping[BugId, set[BugId]]]], + external_links: Iterable[tuple[ExternalLinkConfig, Mapping[BugId, set[str]]]], +) -> None: + data: dict[str, Any] = {} + data["all_bugs"] = {bug_id: bug.to_json() for bug_id, bug in all_bugs.items()} + data["site_report"] = list(site_reports) + data["etp_reports"] = list(etp_reports) + data["kb_bugs"] = list(kb_bugs) + data["platform_bugs"] = list(platform_bugs) + for bug_link_config, link_data in bug_links: + data[bug_link_config.table_name] = { + bug_id: list(values) for bug_id, values in link_data.items() + } + for external_link_config, external_link_data in external_links: + data[external_link_config.table_name] = { + bug_id: list(values) for bug_id, values in external_link_data.items() + } + + with open(path, "w") as f: + json.dump(data, f) + - if fetch_all_result is None: - raise BugFetchError( +def load_bugs( + bz_client: bugdantic.Bugzilla, load_bug_data_path: Optional[str] +) -> BugsById: + if load_bug_data_path is not None: + try: + logging.info(f"Reading bug data from {load_bug_data_path}") + with open(load_bug_data_path) as f: + data = json.load(f) + return { + int(bug_id): Bug.from_json(bug_data) + for bug_id, bug_data in data["all_bugs"].items() + } + except Exception as e: + raise BugLoadError(f"Reading bugs from {load_bug_data_path} failed") from e + else: + try: + return fetch_all_bugs(bz_client) + except Exception as e: + raise BugLoadError( "Fetching bugs from Bugzilla was not completed due to an error, aborting." - ) + ) from e + + +def run( + client: bigquery.Client, + bq_dataset_id: str, + bz_client: bugdantic.Bugzilla, + write: bool, + include_history: bool, + recreate_history: bool, + write_bug_data_path: Optional[str], + load_bug_data_path: Optional[str], +) -> None: + start_time = time.monotonic() + + all_bugs = load_bugs(bz_client, load_bug_data_path) + + history_changes = None + if include_history: + history_updater = BugHistoryUpdater(client, bq_dataset_id, bz_client) + try: + history_changes = history_updater.run(all_bugs, recreate_history) + except Exception as e: + logging.error(f"Exception updating history: {e}") + raise + else: + logging.info("Not updating bug history") + site_reports, etp_reports, kb_bugs, platform_bugs = group_bugs(all_bugs) + + # Links between different kinds of bugs + bug_links = [ + ( + BugLinkConfig("breakage_reports", "knowledge_base_bug", "breakage_bug"), + get_kb_bug_site_report(all_bugs, kb_bugs, site_reports), + ), + ( + BugLinkConfig("core_bugs", "knowledge_base_bug", "core_bug"), + get_kb_bug_core_bugs(all_bugs, kb_bugs, platform_bugs), + ), ( + BugLinkConfig("etp_breakage_reports", "breakage_bug", "etp_meta_bug"), + get_etp_breakage_reports(all_bugs, etp_reports), + ), + ] + + # Links between bugs and external data sources + external_links = [ + (config, config.get_links(all_bugs, kb_bugs)) + for config in EXTERNAL_LINK_CONFIGS.values() + ] + + if write_bug_data_path is not None: + write_bugs( + write_bug_data_path, all_bugs, site_reports, + etp_reports, kb_bugs, platform_bugs, - etp_reports, - etp_dependencies, - ) = fetch_all_result - - # Add platform bugs that should be imported as knowledge base bugs (with some - # modifications to their dependencies) - kb_bugs.update( - self.kb_bugs_from_platform_bugs( - platform_bugs, set(kb_bugs.keys()), set(site_reports.keys()) - ) + bug_links, + external_links, ) - # Process KB bugs fields and get their dependant core/breakage bugs ids. - kb_data, kb_dep_ids = self.process_relations(kb_bugs, RELATION_CONFIG) - self.add_kb_entry_breakage(kb_data, kb_dep_ids, site_reports) + last_change_time_max = max(bug.last_change_time for bug in all_bugs.values()) - fetch_missing_result = self.fetch_missing_deps(all_bugs, kb_dep_ids) - if fetch_missing_result is None: - raise BugFetchError( - "Fetching missing dependencies from Bugzilla was not completed due to an error, aborting." - ) + # Finally do the actual import + importer = BigQueryImporter(client, bq_dataset_id, write) + importer.insert_bugs(all_bugs) + if history_changes is not None: + importer.insert_history_changes(history_changes, recreate=recreate_history) - missing_bugs, core_missing = fetch_missing_result - - platform_bugs.update(core_missing) - all_bugs.update(missing_bugs) - - # Process core bugs and update KB data with missing links from core bugs. - if platform_bugs: - core_data, _ = self.process_relations( - platform_bugs, PLATFORM_RELATION_CONFIG - ) - kb_data = self.add_links(kb_data, core_data) + importer.insert_bug_list("kb_bugs", "number", kb_bugs) - # Build relations for BQ tables. - rels = self.build_relations(kb_data, RELATION_CONFIG) - - kb_ids = list(kb_data.keys()) - - if self.include_history: - history_changes, history_fetch_completed = self.fetch_bug_history( - all_bugs, self.recreate_history - ) - else: - logging.info("Not updating bug history") - history_changes = [] - history_fetch_completed = False - - etp_rels: Mapping[str, list[Mapping[str, Any]]] = {} - if etp_reports: - etp_reports_unified = self.unify_etp_dependencies( - etp_reports, etp_dependencies - ) - etp_data, _ = self.process_relations( - etp_reports_unified, ETP_RELATION_CONFIG - ) + for bug_link_config, data in bug_links: + importer.insert_bug_links(bug_link_config, data) - etp_rels = self.build_relations(etp_data, ETP_RELATION_CONFIG) + for external_link_config, links_by_bug in external_links: + importer.insert_external_links(external_link_config, links_by_bug) - if self.write: - if history_fetch_completed: - self.update_history(history_changes, self.recreate_history) - elif self.include_history: - logging.warning("Failed to fetch bug history, not updating") - self.update_bugs(all_bugs) - self.update_kb_ids(kb_ids) - self.update_relations(rels, RELATION_CONFIG) - self.update_relations(etp_rels, ETP_RELATION_CONFIG) - - last_change_time_max = max( - all_bugs.values(), key=lambda x: x["last_change_time"] - )["last_change_time"] - - self.record_import_run( - start_time, - history_fetch_completed, - len(all_bugs), - len(history_changes), - last_change_time_max, - ) - else: - logging.info("Skipping writes") + importer.record_import_run( + start_time, + len(all_bugs), + len(history_changes) if history_changes is not None else None, + last_change_time_max, + ) class BugzillaJob(EtlJob): @@ -1405,15 +1363,32 @@ def add_arguments(cls, parser: argparse.ArgumentParser) -> None: action="store_true", help="Re-read bug history from scratch", ) + group.add_argument( + "--bugzilla-write-bug-data", + action="store", + help="Path to write bug data as a JSON file", + ) + group.add_argument( + "--bugzilla-load-bug-data", + action="store", + help="Path to JSON file to load bug data from", + ) def main(self, client: bigquery.Client, args: argparse.Namespace) -> None: - bz_bq = BugzillaToBigQuery( + bz_config = bugdantic.BugzillaConfig( + "https://bugzilla.mozilla.org", + args.bugzilla_api_key, + allow_writes=args.write, + ) + bz_client = bugdantic.Bugzilla(bz_config) + + run( client, args.bq_kb_dataset, - args.bugzilla_api_key, + bz_client, args.write, args.bugzilla_include_history, args.bugzilla_recreate_history, + args.bugzilla_write_bug_data, + args.bugzilla_load_bug_data, ) - - bz_bq.run() diff --git a/jobs/webcompat-kb/webcompat_kb/metric_changes.py b/jobs/webcompat-kb/webcompat_kb/metric_changes.py index 99598d2a..2a50489a 100644 --- a/jobs/webcompat-kb/webcompat_kb/metric_changes.py +++ b/jobs/webcompat-kb/webcompat_kb/metric_changes.py @@ -9,8 +9,8 @@ from google.cloud import bigquery from .base import EtlJob -from .bugzilla import parse_string_to_json from .bqhelpers import ensure_table +from .bugzilla import parse_user_story FIXED_STATES = {"RESOLVED", "VERIFIED"} @@ -329,7 +329,7 @@ def compute_historic_scores( "index": i, "keywords": state.keywords, "url": state.url, - "user_story": parse_string_to_json(state.user_story), + "user_story": parse_user_story(state.user_story), } )