Skip to content

Commit 749eed3

Browse files
committed
feat: add Confluence integration with authentication and file loading capabilities
- Enhanced settings.py to include Confluence client ID and secret - Created ConfluenceAuth class for handling authentication with Confluence - Implemented ConfluenceLoader class for loading data from Confluence - Updated connector_creator.py to register Confluence as a connector - Added confluence.svg asset for UI representation - Modified ConnectorAuth component to support Confluence connection - Updated FilePicker component to include Confluence as a file source - Added localization support for Confluence in multiple languages (de, en, es, jp, ru, zh-TW, zh) - Enhanced Upload component to handle Confluence file selection - Updated ingestor types to include Confluence and its configuration
1 parent 23aeaff commit 749eed3

18 files changed

Lines changed: 748 additions & 8 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,5 +181,6 @@ application/vectors/
181181

182182
node_modules/
183183
.vscode/settings.json
184+
.vscode/sftp.json
184185
/models/
185186
model/

application/core/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ class Settings(BaseSettings):
5959
MICROSOFT_TENANT_ID: Optional[str] = "common" # Azure AD Tenant ID (or 'common' for multi-tenant)
6060
MICROSOFT_AUTHORITY: Optional[str] = None # e.g., "https://login.microsoftonline.com/{tenant_id}"
6161

62+
# Confluence Cloud integration
63+
CONFLUENCE_CLIENT_ID: Optional[str] = None
64+
CONFLUENCE_CLIENT_SECRET: Optional[str] = None
65+
6266
# GitHub source
6367
GITHUB_ACCESS_TOKEN: Optional[str] = None # PAT token with read repo access
6468

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .auth import ConfluenceAuth
2+
from .loader import ConfluenceLoader
3+
4+
__all__ = ["ConfluenceAuth", "ConfluenceLoader"]
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
import datetime
2+
import logging
3+
from typing import Any, Dict, Optional
4+
from urllib.parse import urlencode
5+
6+
import requests
7+
8+
from application.core.settings import settings
9+
from application.parser.connectors.base import BaseConnectorAuth
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
class ConfluenceAuth(BaseConnectorAuth):
15+
16+
SCOPES = [
17+
"read:page:confluence",
18+
"read:space:confluence",
19+
"read:attachment:confluence",
20+
"read:me",
21+
"offline_access",
22+
]
23+
24+
AUTH_URL = "https://auth.atlassian.com/authorize"
25+
TOKEN_URL = "https://auth.atlassian.com/oauth/token"
26+
RESOURCES_URL = "https://api.atlassian.com/oauth/token/accessible-resources"
27+
ME_URL = "https://api.atlassian.com/me"
28+
29+
def __init__(self):
30+
self.client_id = settings.CONFLUENCE_CLIENT_ID
31+
self.client_secret = settings.CONFLUENCE_CLIENT_SECRET
32+
self.redirect_uri = settings.CONNECTOR_REDIRECT_BASE_URI
33+
34+
if not self.client_id or not self.client_secret:
35+
raise ValueError(
36+
"Confluence OAuth credentials not configured. "
37+
"Please set CONFLUENCE_CLIENT_ID and CONFLUENCE_CLIENT_SECRET in settings."
38+
)
39+
40+
def get_authorization_url(self, state: Optional[str] = None) -> str:
41+
params = {
42+
"audience": "api.atlassian.com",
43+
"client_id": self.client_id,
44+
"scope": " ".join(self.SCOPES),
45+
"redirect_uri": self.redirect_uri,
46+
"state": state,
47+
"response_type": "code",
48+
"prompt": "consent",
49+
}
50+
return f"{self.AUTH_URL}?{urlencode(params)}"
51+
52+
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
53+
if not authorization_code:
54+
raise ValueError("Authorization code is required")
55+
56+
response = requests.post(
57+
self.TOKEN_URL,
58+
json={
59+
"grant_type": "authorization_code",
60+
"client_id": self.client_id,
61+
"client_secret": self.client_secret,
62+
"code": authorization_code,
63+
"redirect_uri": self.redirect_uri,
64+
},
65+
headers={"Content-Type": "application/json"},
66+
timeout=30,
67+
)
68+
response.raise_for_status()
69+
token_data = response.json()
70+
71+
access_token = token_data.get("access_token")
72+
if not access_token:
73+
raise ValueError("OAuth flow did not return an access token")
74+
75+
refresh_token = token_data.get("refresh_token")
76+
if not refresh_token:
77+
raise ValueError("OAuth flow did not return a refresh token")
78+
79+
expires_in = token_data.get("expires_in", 3600)
80+
expiry = (
81+
datetime.datetime.now(datetime.timezone.utc)
82+
+ datetime.timedelta(seconds=expires_in)
83+
).isoformat()
84+
85+
cloud_id = self._fetch_cloud_id(access_token)
86+
user_info = self._fetch_user_info(access_token)
87+
88+
return {
89+
"access_token": access_token,
90+
"refresh_token": refresh_token,
91+
"token_uri": self.TOKEN_URL,
92+
"scopes": self.SCOPES,
93+
"expiry": expiry,
94+
"cloud_id": cloud_id,
95+
"user_info": {
96+
"name": user_info.get("display_name", ""),
97+
"email": user_info.get("email", ""),
98+
},
99+
}
100+
101+
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
102+
if not refresh_token:
103+
raise ValueError("Refresh token is required")
104+
105+
response = requests.post(
106+
self.TOKEN_URL,
107+
json={
108+
"grant_type": "refresh_token",
109+
"client_id": self.client_id,
110+
"client_secret": self.client_secret,
111+
"refresh_token": refresh_token,
112+
},
113+
headers={"Content-Type": "application/json"},
114+
timeout=30,
115+
)
116+
response.raise_for_status()
117+
token_data = response.json()
118+
119+
access_token = token_data.get("access_token")
120+
new_refresh_token = token_data.get("refresh_token", refresh_token)
121+
122+
expires_in = token_data.get("expires_in", 3600)
123+
expiry = (
124+
datetime.datetime.now(datetime.timezone.utc)
125+
+ datetime.timedelta(seconds=expires_in)
126+
).isoformat()
127+
128+
cloud_id = self._fetch_cloud_id(access_token)
129+
130+
return {
131+
"access_token": access_token,
132+
"refresh_token": new_refresh_token,
133+
"token_uri": self.TOKEN_URL,
134+
"scopes": self.SCOPES,
135+
"expiry": expiry,
136+
"cloud_id": cloud_id,
137+
}
138+
139+
def is_token_expired(self, token_info: Dict[str, Any]) -> bool:
140+
if not token_info:
141+
return True
142+
143+
expiry = token_info.get("expiry")
144+
if not expiry:
145+
return bool(token_info.get("access_token"))
146+
147+
try:
148+
from dateutil import parser
149+
150+
expiry_dt = parser.parse(expiry)
151+
now = datetime.datetime.now(datetime.timezone.utc)
152+
return now >= expiry_dt - datetime.timedelta(seconds=60)
153+
except Exception:
154+
return True
155+
156+
def get_token_info_from_session(self, session_token: str) -> Dict[str, Any]:
157+
from application.core.mongo_db import MongoDB
158+
from application.core.settings import settings as app_settings
159+
160+
mongo = MongoDB.get_client()
161+
db = mongo[app_settings.MONGO_DB_NAME]
162+
163+
session = db["connector_sessions"].find_one({"session_token": session_token})
164+
if not session:
165+
raise ValueError(f"Invalid session token: {session_token}")
166+
167+
token_info = session.get("token_info")
168+
if not token_info:
169+
raise ValueError("Session missing token information")
170+
171+
required = ["access_token", "refresh_token", "cloud_id"]
172+
missing = [f for f in required if not token_info.get(f)]
173+
if missing:
174+
raise ValueError(f"Missing required token fields: {missing}")
175+
176+
return token_info
177+
178+
def sanitize_token_info(
179+
self, token_info: Dict[str, Any], **extra_fields
180+
) -> Dict[str, Any]:
181+
return super().sanitize_token_info(
182+
token_info,
183+
cloud_id=token_info.get("cloud_id"),
184+
**extra_fields,
185+
)
186+
187+
def _fetch_cloud_id(self, access_token: str) -> str:
188+
response = requests.get(
189+
self.RESOURCES_URL,
190+
headers={
191+
"Authorization": f"Bearer {access_token}",
192+
"Accept": "application/json",
193+
},
194+
timeout=30,
195+
)
196+
response.raise_for_status()
197+
resources = response.json()
198+
199+
if not resources:
200+
raise ValueError("No accessible Confluence sites found for this account")
201+
202+
return resources[0]["id"]
203+
204+
def _fetch_user_info(self, access_token: str) -> Dict[str, Any]:
205+
try:
206+
response = requests.get(
207+
self.ME_URL,
208+
headers={
209+
"Authorization": f"Bearer {access_token}",
210+
"Accept": "application/json",
211+
},
212+
timeout=30,
213+
)
214+
response.raise_for_status()
215+
return response.json()
216+
except Exception as e:
217+
logger.warning("Could not fetch user info: %s", e)
218+
return {}

0 commit comments

Comments
 (0)