Skip to content

Commit 8beb05a

Browse files
Add type hints to Confluence plugin (#442)
- Add comprehensive type hints throughout the Confluence plugin - Refactor class hierarchy: * Rename ConfluenceStats to ConfluenceStatsGroup (main stats group) * Introduce ConfluenceStats as abstract base class for stat classes * Update PageCreated, PageModified, CommentAdded to inherit from ConfluenceStats - Improve error handling: * Add session renewal on 401 (unauthorized) responses * Add retry logic for connection aborted errors * Add renew_session() method for session management - Add markdown format support for ConfluenceComment output - Fix missing docstring for CommentAdded class - Update test file to use ConfluenceStatsGroup and add type hints - Fix typo in test function name (invaliad -> invalid) - Change TIMEOUT constant to float (60.0) for type consistency Signed-off-by: Sandro Bonazzola <[email protected]>
1 parent c5e7a3e commit 8beb05a

File tree

2 files changed

+135
-62
lines changed

2 files changed

+135
-62
lines changed

did/plugins/confluence.py

Lines changed: 111 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,19 @@
7070
import re
7171
import time
7272
import urllib.parse
73+
from argparse import Namespace
7374
from datetime import datetime
75+
from http import HTTPStatus
76+
from typing import Any, Optional
7477

7578
import dateutil.parser
7679
import requests
7780
import urllib3
81+
import urllib3.exceptions
7882
from requests_gssapi import DISABLED # type: ignore[import-untyped]
7983
from requests_gssapi import HTTPSPNEGOAuth
8084

81-
from did.base import Config, ReportError, get_token
85+
from did.base import Config, ReportError, User, get_token
8286
from did.stats import Stats, StatsGroup
8387
from did.utils import listed, log, pretty, strtobool
8488

@@ -95,7 +99,7 @@
9599
SSL_VERIFY = True
96100

97101
# Default number of seconds waiting on Sentry before giving up
98-
TIMEOUT = 60
102+
TIMEOUT = 60.0
99103

100104
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101105
# Issue Investigator
@@ -107,15 +111,27 @@ class Confluence():
107111
# pylint: disable=too-few-public-methods
108112

109113
@staticmethod
110-
def fetch_ratelimited_url(session, current_url, timeout):
114+
def fetch_ratelimited_url(
115+
parent: "ConfluenceStatsGroup",
116+
current_url: str,
117+
timeout: float) -> dict[str, Any]:
118+
"""
119+
Fetch a URL with rate limiting.
120+
121+
:param parent: The parent stats group.
122+
:param current_url: The URL to fetch.
123+
:param timeout: The timeout for the request.
124+
:return: The json of the response from the URL.
125+
"""
111126
log.debug("Fetching %s", current_url)
127+
session = parent.session
112128
while True:
113129
try:
114130
response = session.get(
115131
current_url,
116132
timeout=timeout)
117133
# Handle the exceeded rate limit
118-
if response.status_code == 429:
134+
if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
119135
if response.headers.get("X-RateLimit-Remaining") == "0":
120136
# Wait at least a second.
121137
retry_after = max(int(response.headers["retry-after"]), 1)
@@ -124,6 +140,8 @@ def fetch_ratelimited_url(session, current_url, timeout):
124140
listed(retry_after, 'second'))
125141
time.sleep(retry_after)
126142
continue
143+
if response.status_code == HTTPStatus.UNAUTHORIZED:
144+
session = parent.renew_session()
127145

128146
response.raise_for_status()
129147
except requests.Timeout:
@@ -135,13 +153,17 @@ def fetch_ratelimited_url(session, current_url, timeout):
135153
urllib3.exceptions.NewConnectionError,
136154
requests.exceptions.HTTPError
137155
) as error:
156+
if 'Connection aborted' in str(error):
157+
log.warning("Connection aborted. Sleeping for 10 seconds.")
158+
time.sleep(10)
159+
continue
138160
log.error("Error fetching '%s': %s", current_url, error)
139161
raise ReportError(
140162
f"Failed to connect to Confluence at {current_url}."
141163
) from error
142164
break
143165
try:
144-
data = response.json()
166+
data: dict[str, Any] = response.json()
145167
except requests.exceptions.JSONDecodeError as error:
146168
log.debug(error)
147169
raise ReportError(
@@ -160,8 +182,12 @@ def fetch_ratelimited_url(session, current_url, timeout):
160182
return data
161183

162184
@staticmethod
163-
def get_page_versions(page_id, stats):
185+
def get_page_versions(
186+
page_id: str,
187+
stats: "ConfluenceStats") -> list[dict[str, Any]]:
164188
"Fetch all versions of the given page"
189+
if stats.parent is None:
190+
raise RuntimeError("f{stats} not initialized")
165191
version_url = f"{stats.parent.url}/rest/experimental/content/{page_id}/version"
166192
versions = []
167193
start = 0
@@ -170,7 +196,7 @@ def get_page_versions(page_id, stats):
170196
encoded_query = urllib.parse.urlencode({"start": start, "limit": limit})
171197

172198
data = Confluence.fetch_ratelimited_url(
173-
stats.parent.session,
199+
stats.parent,
174200
f"{version_url}?{encoded_query}",
175201
stats.parent.timeout)
176202
versions.extend(data.get("results", []))
@@ -180,7 +206,11 @@ def get_page_versions(page_id, stats):
180206
return versions
181207

182208
@staticmethod
183-
def search(query, stats, expand=None, timeout=TIMEOUT):
209+
def search(
210+
query: str,
211+
stats: "ConfluenceStats",
212+
expand: Optional[str] = None,
213+
timeout: float = TIMEOUT) -> list[dict[str, Any]]:
184214
""" Perform page/comment search for given stats instance """
185215
log.debug("Search query: %s", query)
186216
content = []
@@ -196,7 +226,7 @@ def search(query, stats, expand=None, timeout=TIMEOUT):
196226
)
197227
current_url = f"{stats.parent.url}/rest/api/content/search?{encoded_query}"
198228
data = Confluence.fetch_ratelimited_url(
199-
stats.parent.session, current_url, timeout
229+
stats.parent, current_url, timeout
200230
)
201231
log.debug(
202232
"Batch %s result: %s fetched",
@@ -214,13 +244,13 @@ def search(query, stats, expand=None, timeout=TIMEOUT):
214244
class ConfluencePage(Confluence):
215245
""" Confluence page results """
216246

217-
def __init__(self, page, url, myformat):
247+
def __init__(self, page: dict[str, Any], url: str, myformat: str) -> None:
218248
""" Initialize the page """
219249
self.title = page['title']
220250
self.url = f"{url}{page['_links']['webui']}"
221251
self.format = myformat
222252

223-
def __str__(self):
253+
def __str__(self) -> str:
224254
""" Page title for displaying """
225255
if self.format == "markdown":
226256
return f"[{self.title}]({self.url})"
@@ -230,7 +260,7 @@ def __str__(self):
230260
class ConfluenceComment(Confluence):
231261
""" Confluence comment results """
232262

233-
def __init__(self, comment, url, myformat):
263+
def __init__(self, comment: dict[str, Any], url: str, myformat: str) -> None:
234264
""" Initialize issue """
235265
# Remove the 'Re:' prefix
236266
self.title = re.sub('^Re: ', '', comment['title'])
@@ -241,9 +271,10 @@ def __init__(self, comment, url, myformat):
241271
self.url = url
242272
self.format = myformat
243273

244-
def __str__(self):
274+
def __str__(self) -> str:
245275
""" Confluence title & comment snippet for displaying """
246-
# TODO: implement markdown output here
276+
if self.format == "markdown":
277+
return f"[{self.title}]({self.url}): {self.body}"
247278
return f"{self.title}: {self.body}"
248279

249280

@@ -252,10 +283,30 @@ def __str__(self):
252283
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
253284

254285

255-
class PageCreated(Stats):
286+
class ConfluenceStats(Stats):
287+
"""
288+
Abstract class for collecting Confluence related Stats
289+
"""
290+
291+
def __init__(self, /,
292+
option: str,
293+
name: str,
294+
parent: "ConfluenceStatsGroup",
295+
user: Optional[User], *,
296+
options: Optional[Namespace]) -> None:
297+
self.parent: ConfluenceStatsGroup
298+
self.options: Namespace
299+
self.user: User
300+
super().__init__(option, name, parent, user, options=options)
301+
302+
def fetch(self) -> None:
303+
raise NotImplementedError()
304+
305+
306+
class PageCreated(ConfluenceStats):
256307
""" Created pages """
257308

258-
def fetch(self):
309+
def fetch(self) -> None:
259310
log.info("Searching for pages created by %s", self.user)
260311
query = (
261312
f"type=page AND creator = '{self.user.login}' "
@@ -270,10 +321,10 @@ def fetch(self):
270321
]
271322

272323

273-
class PageModified(Stats):
324+
class PageModified(ConfluenceStats):
274325
""" Modified pages """
275326

276-
def fetch(self):
327+
def fetch(self) -> None:
277328
log.info("Searching for pages modified by %s", self.user)
278329
query = (
279330
f"type=page AND contributor = '{self.user.login}' "
@@ -304,8 +355,10 @@ def fetch(self):
304355
]
305356

306357

307-
class CommentAdded(Stats):
308-
def fetch(self):
358+
class CommentAdded(ConfluenceStats):
359+
""" Commented pages """
360+
361+
def fetch(self) -> None:
309362
log.info("Searching for comments added by %s", self.user)
310363
query = (
311364
f"type=comment AND creator = '{self.user.login}' "
@@ -323,13 +376,14 @@ def fetch(self):
323376
# Stats Group
324377
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
325378

326-
class ConfluenceStats(StatsGroup):
379+
380+
class ConfluenceStatsGroup(StatsGroup):
327381
""" Confluence stats """
328382

329383
# Default order
330384
order = 600
331385

332-
def _basic_auth(self, option, config):
386+
def _basic_auth(self, option: str, config: dict[str, str]) -> None:
333387
if "auth_username" not in config:
334388
raise ReportError(f"`auth_username` not set in the [{option}] section")
335389
self.auth_username = config["auth_username"]
@@ -344,16 +398,16 @@ def _basic_auth(self, option, config):
344398
"`auth_password` or `auth_password_file` must be set "
345399
f"in the [{option}] section.")
346400

347-
def _token_auth(self, option, config):
401+
def _token_auth(self, option: str, config: dict[str, str]) -> None:
348402
self.token = get_token(config)
349403
if self.token is None:
350404
raise ReportError(
351405
"The `token` or `token_file` key must be set "
352406
f"in the [{option}] section.")
353407
if "token_expiration" in config or "token_name" in config:
354408
try:
355-
self.token_expiration = int(config["token_expiration"])
356-
self.token_name = config["token_name"]
409+
self.token_expiration: Optional[int] = int(config["token_expiration"])
410+
self.token_name: Optional[str] = config["token_name"]
357411
except KeyError as key_err:
358412
raise ReportError(
359413
"The ``token_name`` and ``token_expiration`` must be set at"
@@ -363,26 +417,30 @@ def _token_auth(self, option, config):
363417
"The ``token_expiration`` must contain number, "
364418
f"used in [{option}] section.") from val_err
365419
else:
366-
self.token_expiration = self.token_name = None
420+
self.token_expiration = None
421+
self.token_name = None
367422

368-
def _set_ssl_verification(self, config):
423+
def _set_ssl_verification(self, config: dict[str, str]) -> None:
369424
# SSL verification
370425
if "ssl_verify" in config:
371426
try:
372-
self.ssl_verify = strtobool(
373-
config["ssl_verify"])
427+
self.ssl_verify = bool(strtobool(config["ssl_verify"]))
374428
except Exception as error:
375429
raise ReportError(
376430
f"Error when parsing 'ssl_verify': {error}") from error
377431
else:
378432
self.ssl_verify = SSL_VERIFY
379433

380-
def __init__(self, option, name=None, parent=None, user=None):
434+
def __init__(self,
435+
option: str,
436+
name: Optional[str] = None,
437+
parent: Optional[StatsGroup] = None,
438+
user: Optional[User] = None) -> None:
381439
StatsGroup.__init__(self, option, name, parent, user)
382-
self._session = None
440+
self._session: Optional[requests.Session] = None
383441
# Make sure there is an url provided
384442
config = dict(Config().section(option))
385-
self.timeout = config.get("timeout", TIMEOUT)
443+
self.timeout: float = float(config.get("timeout", TIMEOUT))
386444
if "url" not in config:
387445
raise ReportError(f"No Confluence url set in the [{option}] section")
388446
self.url = config["url"].rstrip("/")
@@ -413,6 +471,7 @@ def __init__(self, option, name=None, parent=None, user=None):
413471
f" basic authentication (section [{option}])")
414472
# Token
415473
self.token_expiration = None
474+
self.token_name = None
416475
if self.auth_type == "token":
417476
self._token_auth(option, config)
418477
self._set_ssl_verification(config)
@@ -425,22 +484,28 @@ def __init__(self, option, name=None, parent=None, user=None):
425484
PageCreated(
426485
option=f"{option}-pages-created",
427486
parent=self,
487+
user=self.user,
488+
options=self.options,
428489
name=f"Pages created in {option}"),
429490
PageModified(
430491
option=f"{option}-pages-updated",
431492
parent=self,
493+
user=self.user,
494+
options=self.options,
432495
name=f"Pages updated in {option}"),
433496
CommentAdded(
434497
option=f"{option}-comments",
435498
parent=self,
499+
user=self.user,
500+
options=self.options,
436501
name=f"Comments added in {option}"),
437502
]
438503

439-
def _basic_auth_session(self):
504+
def _basic_auth_session(self) -> requests.Response:
440505
log.debug("Connecting to %s for basic auth", self.auth_url)
441506
basic_auth = (self.auth_username, self.auth_password)
442507
try:
443-
response = self._session.get(
508+
response = self.session.get(
444509
self.auth_url, auth=basic_auth, verify=self.ssl_verify,
445510
timeout=self.timeout)
446511
except (requests.exceptions.ConnectionError,
@@ -452,12 +517,12 @@ def _basic_auth_session(self):
452517
) from error
453518
return response
454519

455-
def _token_auth_session(self):
520+
def _token_auth_session(self) -> requests.Response:
456521
log.debug("Connecting to %s/rest/api/content for token auth", self.url)
457522
self.session.headers["Authorization"] = f"Bearer {self.token}"
458523
while True:
459524
try:
460-
response = self._session.get(
525+
response = self.session.get(
461526
f"{self.url}/rest/api/content",
462527
verify=self.ssl_verify,
463528
timeout=self.timeout)
@@ -475,11 +540,13 @@ def _token_auth_session(self):
475540
break
476541
return response
477542

478-
def _gss_api_auth_session(self):
543+
def _gss_api_auth_session(self) -> requests.Response:
544+
if self._session is None:
545+
raise RuntimeError("Session has not been initialized")
479546
log.debug("Connecting to %s for gssapi auth", self.auth_url)
480547
gssapi_auth = HTTPSPNEGOAuth(mutual_authentication=DISABLED)
481548
try:
482-
response = self._session.get(
549+
response: requests.Response = self._session.get(
483550
self.auth_url, auth=gssapi_auth, verify=self.ssl_verify,
484551
timeout=self.timeout)
485552
except (requests.exceptions.ConnectionError,
@@ -491,8 +558,12 @@ def _gss_api_auth_session(self):
491558
) from error
492559
return response
493560

561+
def renew_session(self) -> requests.Session:
562+
self._session = None
563+
return self.session
564+
494565
@property
495-
def session(self):
566+
def session(self) -> requests.Session:
496567
""" Initialize the session """
497568
# pylint: disable=too-many-branches
498569
if self._session is not None:
@@ -508,7 +579,7 @@ def session(self):
508579
response = self._token_auth_session()
509580
else:
510581
response = self._gss_api_auth_session()
511-
if response.status_code == 429:
582+
if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
512583
retry_after = 1
513584
if response.headers.get("X-RateLimit-Remaining") == "0":
514585
retry_after = max(int(response.headers["retry-after"]), 1)

0 commit comments

Comments
 (0)