7070import re
7171import time
7272import urllib .parse
73+ from argparse import Namespace
7374from datetime import datetime
75+ from http import HTTPStatus
76+ from typing import Any , Optional
7477
7578import dateutil .parser
7679import requests
7780import urllib3
81+ import urllib3 .exceptions
7882from requests_gssapi import DISABLED # type: ignore[import-untyped]
7983from requests_gssapi import HTTPSPNEGOAuth
8084
81- from did .base import Config , ReportError , get_token
85+ from did .base import Config , ReportError , User , get_token
8286from did .stats import Stats , StatsGroup
8387from did .utils import listed , log , pretty , strtobool
8488
9599SSL_VERIFY = True
96100
97101# Default number of seconds waiting on Sentry before giving up
98- TIMEOUT = 60
102+ TIMEOUT = 60.0
99103
100104# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101105# Issue Investigator
@@ -107,15 +111,27 @@ class Confluence():
107111 # pylint: disable=too-few-public-methods
108112
109113 @staticmethod
110- def fetch_ratelimited_url (session , current_url , timeout ):
114+ def fetch_ratelimited_url (
115+ parent : "ConfluenceStatsGroup" ,
116+ current_url : str ,
117+ timeout : float ) -> dict [str , Any ]:
118+ """
119+ Fetch a URL with rate limiting.
120+
121+ :param parent: The parent stats group.
122+ :param current_url: The URL to fetch.
123+ :param timeout: The timeout for the request.
124+ :return: The json of the response from the URL.
125+ """
111126 log .debug ("Fetching %s" , current_url )
127+ session = parent .session
112128 while True :
113129 try :
114130 response = session .get (
115131 current_url ,
116132 timeout = timeout )
117133 # Handle the exceeded rate limit
118- if response .status_code == 429 :
134+ if response .status_code == HTTPStatus . TOO_MANY_REQUESTS :
119135 if response .headers .get ("X-RateLimit-Remaining" ) == "0" :
120136 # Wait at least a second.
121137 retry_after = max (int (response .headers ["retry-after" ]), 1 )
@@ -124,6 +140,8 @@ def fetch_ratelimited_url(session, current_url, timeout):
124140 listed (retry_after , 'second' ))
125141 time .sleep (retry_after )
126142 continue
143+ if response .status_code == HTTPStatus .UNAUTHORIZED :
144+ session = parent .renew_session ()
127145
128146 response .raise_for_status ()
129147 except requests .Timeout :
@@ -135,13 +153,17 @@ def fetch_ratelimited_url(session, current_url, timeout):
135153 urllib3 .exceptions .NewConnectionError ,
136154 requests .exceptions .HTTPError
137155 ) as error :
156+ if 'Connection aborted' in str (error ):
157+ log .warning ("Connection aborted. Sleeping for 10 seconds." )
158+ time .sleep (10 )
159+ continue
138160 log .error ("Error fetching '%s': %s" , current_url , error )
139161 raise ReportError (
140162 f"Failed to connect to Confluence at { current_url } ."
141163 ) from error
142164 break
143165 try :
144- data = response .json ()
166+ data : dict [ str , Any ] = response .json ()
145167 except requests .exceptions .JSONDecodeError as error :
146168 log .debug (error )
147169 raise ReportError (
@@ -160,8 +182,12 @@ def fetch_ratelimited_url(session, current_url, timeout):
160182 return data
161183
162184 @staticmethod
163- def get_page_versions (page_id , stats ):
185+ def get_page_versions (
186+ page_id : str ,
187+ stats : "ConfluenceStats" ) -> list [dict [str , Any ]]:
164188 "Fetch all versions of the given page"
189+ if stats .parent is None :
190+ raise RuntimeError ("f{stats} not initialized" )
165191 version_url = f"{ stats .parent .url } /rest/experimental/content/{ page_id } /version"
166192 versions = []
167193 start = 0
@@ -170,7 +196,7 @@ def get_page_versions(page_id, stats):
170196 encoded_query = urllib .parse .urlencode ({"start" : start , "limit" : limit })
171197
172198 data = Confluence .fetch_ratelimited_url (
173- stats .parent . session ,
199+ stats .parent ,
174200 f"{ version_url } ?{ encoded_query } " ,
175201 stats .parent .timeout )
176202 versions .extend (data .get ("results" , []))
@@ -180,7 +206,11 @@ def get_page_versions(page_id, stats):
180206 return versions
181207
182208 @staticmethod
183- def search (query , stats , expand = None , timeout = TIMEOUT ):
209+ def search (
210+ query : str ,
211+ stats : "ConfluenceStats" ,
212+ expand : Optional [str ] = None ,
213+ timeout : float = TIMEOUT ) -> list [dict [str , Any ]]:
184214 """ Perform page/comment search for given stats instance """
185215 log .debug ("Search query: %s" , query )
186216 content = []
@@ -196,7 +226,7 @@ def search(query, stats, expand=None, timeout=TIMEOUT):
196226 )
197227 current_url = f"{ stats .parent .url } /rest/api/content/search?{ encoded_query } "
198228 data = Confluence .fetch_ratelimited_url (
199- stats .parent . session , current_url , timeout
229+ stats .parent , current_url , timeout
200230 )
201231 log .debug (
202232 "Batch %s result: %s fetched" ,
@@ -214,13 +244,13 @@ def search(query, stats, expand=None, timeout=TIMEOUT):
214244class ConfluencePage (Confluence ):
215245 """ Confluence page results """
216246
217- def __init__ (self , page , url , myformat ) :
247+ def __init__ (self , page : dict [ str , Any ], url : str , myformat : str ) -> None :
218248 """ Initialize the page """
219249 self .title = page ['title' ]
220250 self .url = f"{ url } { page ['_links' ]['webui' ]} "
221251 self .format = myformat
222252
223- def __str__ (self ):
253+ def __str__ (self ) -> str :
224254 """ Page title for displaying """
225255 if self .format == "markdown" :
226256 return f"[{ self .title } ]({ self .url } )"
@@ -230,7 +260,7 @@ def __str__(self):
230260class ConfluenceComment (Confluence ):
231261 """ Confluence comment results """
232262
233- def __init__ (self , comment , url , myformat ) :
263+ def __init__ (self , comment : dict [ str , Any ], url : str , myformat : str ) -> None :
234264 """ Initialize issue """
235265 # Remove the 'Re:' prefix
236266 self .title = re .sub ('^Re: ' , '' , comment ['title' ])
@@ -241,9 +271,10 @@ def __init__(self, comment, url, myformat):
241271 self .url = url
242272 self .format = myformat
243273
244- def __str__ (self ):
274+ def __str__ (self ) -> str :
245275 """ Confluence title & comment snippet for displaying """
246- # TODO: implement markdown output here
276+ if self .format == "markdown" :
277+ return f"[{ self .title } ]({ self .url } ): { self .body } "
247278 return f"{ self .title } : { self .body } "
248279
249280
@@ -252,10 +283,30 @@ def __str__(self):
252283# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
253284
254285
255- class PageCreated (Stats ):
286+ class ConfluenceStats (Stats ):
287+ """
288+ Abstract class for collecting Confluence related Stats
289+ """
290+
291+ def __init__ (self , / ,
292+ option : str ,
293+ name : str ,
294+ parent : "ConfluenceStatsGroup" ,
295+ user : Optional [User ], * ,
296+ options : Optional [Namespace ]) -> None :
297+ self .parent : ConfluenceStatsGroup
298+ self .options : Namespace
299+ self .user : User
300+ super ().__init__ (option , name , parent , user , options = options )
301+
302+ def fetch (self ) -> None :
303+ raise NotImplementedError ()
304+
305+
306+ class PageCreated (ConfluenceStats ):
256307 """ Created pages """
257308
258- def fetch (self ):
309+ def fetch (self ) -> None :
259310 log .info ("Searching for pages created by %s" , self .user )
260311 query = (
261312 f"type=page AND creator = '{ self .user .login } ' "
@@ -270,10 +321,10 @@ def fetch(self):
270321 ]
271322
272323
273- class PageModified (Stats ):
324+ class PageModified (ConfluenceStats ):
274325 """ Modified pages """
275326
276- def fetch (self ):
327+ def fetch (self ) -> None :
277328 log .info ("Searching for pages modified by %s" , self .user )
278329 query = (
279330 f"type=page AND contributor = '{ self .user .login } ' "
@@ -304,8 +355,10 @@ def fetch(self):
304355 ]
305356
306357
307- class CommentAdded (Stats ):
308- def fetch (self ):
358+ class CommentAdded (ConfluenceStats ):
359+ """ Commented pages """
360+
361+ def fetch (self ) -> None :
309362 log .info ("Searching for comments added by %s" , self .user )
310363 query = (
311364 f"type=comment AND creator = '{ self .user .login } ' "
@@ -323,13 +376,14 @@ def fetch(self):
323376# Stats Group
324377# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
325378
326- class ConfluenceStats (StatsGroup ):
379+
380+ class ConfluenceStatsGroup (StatsGroup ):
327381 """ Confluence stats """
328382
329383 # Default order
330384 order = 600
331385
332- def _basic_auth (self , option , config ) :
386+ def _basic_auth (self , option : str , config : dict [ str , str ]) -> None :
333387 if "auth_username" not in config :
334388 raise ReportError (f"`auth_username` not set in the [{ option } ] section" )
335389 self .auth_username = config ["auth_username" ]
@@ -344,16 +398,16 @@ def _basic_auth(self, option, config):
344398 "`auth_password` or `auth_password_file` must be set "
345399 f"in the [{ option } ] section." )
346400
347- def _token_auth (self , option , config ) :
401+ def _token_auth (self , option : str , config : dict [ str , str ]) -> None :
348402 self .token = get_token (config )
349403 if self .token is None :
350404 raise ReportError (
351405 "The `token` or `token_file` key must be set "
352406 f"in the [{ option } ] section." )
353407 if "token_expiration" in config or "token_name" in config :
354408 try :
355- self .token_expiration = int (config ["token_expiration" ])
356- self .token_name = config ["token_name" ]
409+ self .token_expiration : Optional [ int ] = int (config ["token_expiration" ])
410+ self .token_name : Optional [ str ] = config ["token_name" ]
357411 except KeyError as key_err :
358412 raise ReportError (
359413 "The ``token_name`` and ``token_expiration`` must be set at"
@@ -363,26 +417,30 @@ def _token_auth(self, option, config):
363417 "The ``token_expiration`` must contain number, "
364418 f"used in [{ option } ] section." ) from val_err
365419 else :
366- self .token_expiration = self .token_name = None
420+ self .token_expiration = None
421+ self .token_name = None
367422
368- def _set_ssl_verification (self , config ) :
423+ def _set_ssl_verification (self , config : dict [ str , str ]) -> None :
369424 # SSL verification
370425 if "ssl_verify" in config :
371426 try :
372- self .ssl_verify = strtobool (
373- config ["ssl_verify" ])
427+ self .ssl_verify = bool (strtobool (config ["ssl_verify" ]))
374428 except Exception as error :
375429 raise ReportError (
376430 f"Error when parsing 'ssl_verify': { error } " ) from error
377431 else :
378432 self .ssl_verify = SSL_VERIFY
379433
380- def __init__ (self , option , name = None , parent = None , user = None ):
434+ def __init__ (self ,
435+ option : str ,
436+ name : Optional [str ] = None ,
437+ parent : Optional [StatsGroup ] = None ,
438+ user : Optional [User ] = None ) -> None :
381439 StatsGroup .__init__ (self , option , name , parent , user )
382- self ._session = None
440+ self ._session : Optional [ requests . Session ] = None
383441 # Make sure there is an url provided
384442 config = dict (Config ().section (option ))
385- self .timeout = config .get ("timeout" , TIMEOUT )
443+ self .timeout : float = float ( config .get ("timeout" , TIMEOUT ) )
386444 if "url" not in config :
387445 raise ReportError (f"No Confluence url set in the [{ option } ] section" )
388446 self .url = config ["url" ].rstrip ("/" )
@@ -413,6 +471,7 @@ def __init__(self, option, name=None, parent=None, user=None):
413471 f" basic authentication (section [{ option } ])" )
414472 # Token
415473 self .token_expiration = None
474+ self .token_name = None
416475 if self .auth_type == "token" :
417476 self ._token_auth (option , config )
418477 self ._set_ssl_verification (config )
@@ -425,22 +484,28 @@ def __init__(self, option, name=None, parent=None, user=None):
425484 PageCreated (
426485 option = f"{ option } -pages-created" ,
427486 parent = self ,
487+ user = self .user ,
488+ options = self .options ,
428489 name = f"Pages created in { option } " ),
429490 PageModified (
430491 option = f"{ option } -pages-updated" ,
431492 parent = self ,
493+ user = self .user ,
494+ options = self .options ,
432495 name = f"Pages updated in { option } " ),
433496 CommentAdded (
434497 option = f"{ option } -comments" ,
435498 parent = self ,
499+ user = self .user ,
500+ options = self .options ,
436501 name = f"Comments added in { option } " ),
437502 ]
438503
439- def _basic_auth_session (self ):
504+ def _basic_auth_session (self ) -> requests . Response :
440505 log .debug ("Connecting to %s for basic auth" , self .auth_url )
441506 basic_auth = (self .auth_username , self .auth_password )
442507 try :
443- response = self ._session .get (
508+ response = self .session .get (
444509 self .auth_url , auth = basic_auth , verify = self .ssl_verify ,
445510 timeout = self .timeout )
446511 except (requests .exceptions .ConnectionError ,
@@ -452,12 +517,12 @@ def _basic_auth_session(self):
452517 ) from error
453518 return response
454519
455- def _token_auth_session (self ):
520+ def _token_auth_session (self ) -> requests . Response :
456521 log .debug ("Connecting to %s/rest/api/content for token auth" , self .url )
457522 self .session .headers ["Authorization" ] = f"Bearer { self .token } "
458523 while True :
459524 try :
460- response = self ._session .get (
525+ response = self .session .get (
461526 f"{ self .url } /rest/api/content" ,
462527 verify = self .ssl_verify ,
463528 timeout = self .timeout )
@@ -475,11 +540,13 @@ def _token_auth_session(self):
475540 break
476541 return response
477542
478- def _gss_api_auth_session (self ):
543+ def _gss_api_auth_session (self ) -> requests .Response :
544+ if self ._session is None :
545+ raise RuntimeError ("Session has not been initialized" )
479546 log .debug ("Connecting to %s for gssapi auth" , self .auth_url )
480547 gssapi_auth = HTTPSPNEGOAuth (mutual_authentication = DISABLED )
481548 try :
482- response = self ._session .get (
549+ response : requests . Response = self ._session .get (
483550 self .auth_url , auth = gssapi_auth , verify = self .ssl_verify ,
484551 timeout = self .timeout )
485552 except (requests .exceptions .ConnectionError ,
@@ -491,8 +558,12 @@ def _gss_api_auth_session(self):
491558 ) from error
492559 return response
493560
561+ def renew_session (self ) -> requests .Session :
562+ self ._session = None
563+ return self .session
564+
494565 @property
495- def session (self ):
566+ def session (self ) -> requests . Session :
496567 """ Initialize the session """
497568 # pylint: disable=too-many-branches
498569 if self ._session is not None :
@@ -508,7 +579,7 @@ def session(self):
508579 response = self ._token_auth_session ()
509580 else :
510581 response = self ._gss_api_auth_session ()
511- if response .status_code == 429 :
582+ if response .status_code == HTTPStatus . TOO_MANY_REQUESTS :
512583 retry_after = 1
513584 if response .headers .get ("X-RateLimit-Remaining" ) == "0" :
514585 retry_after = max (int (response .headers ["retry-after" ]), 1 )
0 commit comments