diff --git a/perceval/backends/mozilla/kitsune.py b/perceval/backends/mozilla/kitsune.py index 8cd8500..8221333 100644 --- a/perceval/backends/mozilla/kitsune.py +++ b/perceval/backends/mozilla/kitsune.py @@ -278,6 +278,7 @@ def get_question_answers(self, question_id): """Retrieve all answers for a question from older to newer (updated)""" page = KitsuneClient.FIRST_PAGE + failures = 0 while True: api_answers_url = urijoin(self.base_url, '/answer') + '/' @@ -286,9 +287,21 @@ def get_question_answers(self, question_id): "question": question_id, "ordering": "updated" } - - answers_raw = self.fetch(api_answers_url, params) - yield answers_raw + try: + answers_raw = self.fetch(api_answers_url, params) + yield answers_raw + failures = 0 + except requests.exceptions.HTTPError as e: + failures += 1 + if e.response.status_code == 500: + if failures > self.max_retries: + logger.error(f"Problem getting Kitsune answers for question id {question_id}. " + f"Skipping answers for this question.") + return # Stop generator + time.sleep(2 ** failures) + continue + else: + raise answers = json.loads(answers_raw) if not answers['next']: @@ -323,6 +336,9 @@ def fetch(self, url, params): self.sleep_for_rate_limit() else: raise ex + finally: + # Avoids to do requests too fast + time.sleep(0.5) raise HttpClientError(cause="Max retries exceeded") diff --git a/releases/unreleased/handle-kitsune-server-errors.yml b/releases/unreleased/handle-kitsune-server-errors.yml new file mode 100644 index 0000000..4c9a610 --- /dev/null +++ b/releases/unreleased/handle-kitsune-server-errors.yml @@ -0,0 +1,9 @@ +--- +title: Handle Kitsune server errors +category: fixed +author: Jose Javier Merchante +issue: null +notes: > + This change adds retry logic for HTTP 500 errors when + fetching answers. Ignores the answers after several + retries. diff --git a/tests/test_kitsune.py b/tests/test_kitsune.py index c553d26..f2719b5 100644 --- a/tests/test_kitsune.py +++ b/tests/test_kitsune.py @@ -25,8 +25,10 @@ import json import time import unittest +import unittest.mock import httpretty +import requests from grimoirelab_toolkit.datetime import str_to_datetime @@ -42,6 +44,7 @@ KITSUNE_API = KITSUNE_SERVER_URL + '/api/2/' KITSUNE_API_QUESTION = KITSUNE_SERVER_URL + '/api/2/question/' KITSUNE_API_ANSWER = KITSUNE_SERVER_URL + '/api/2/answer/' +KITSUNE_API_ANSWERS_ERROR = KITSUNE_SERVER_URL + '/api/2/answer/?question=12345&page=1&ordering=updated' KITSUNE_SERVER_FAIL_DATE = '3000-01-01' KITSUNE_SERVER_RATELIMIT_DATE = '1980-01-01' @@ -132,6 +135,11 @@ def request_callback(method, uri, headers): httpretty.Response(body=request_callback) ]) + httpretty.register_uri(httpretty.GET, + KITSUNE_API_ANSWERS_ERROR, + match_querystring=True, + status=500) + class TestKitsuneBackend(unittest.TestCase): """Kitsune backend tests""" @@ -459,6 +467,25 @@ def test_rate_limit_error(self): } self.assertDictEqual(req.querystring, expected) + def test_get_question_answers_retries_and_stops_on_500(self): + """Test get question answers stops after max_retries on repeated HTTP 500""" + + client = KitsuneClient(KITSUNE_SERVER_URL, max_retries=2) + question_id = 12345 + + # Simulate HTTP 500 error + error = requests.exceptions.HTTPError() + error.response = unittest.mock.Mock(status_code=500) + + with unittest.mock.patch.object(client, 'fetch', side_effect=error) as mock_fetch, \ + unittest.mock.patch('time.sleep') as mock_sleep, \ + self.assertLogs(level='ERROR') as cm: + result = list(client.get_question_answers(question_id)) + self.assertEqual(result, []) + self.assertIn(f"Problem getting Kitsune answers for question id {question_id}", cm.output[-1]) + self.assertEqual(mock_fetch.call_count, 3) # initial + 2 retries + self.assertEqual(mock_sleep.call_count, 2) # sleep called for each retry + if __name__ == "__main__": unittest.main(warnings='ignore')