66import re
77import logging
88import httpx
9+ from bs4 import BeautifulSoup
910from fastapi import APIRouter , HTTPException , Path , Request
10- from app .models .umap import UMapFeatureCollection , ShowcaseResponse
11+ from app .models .umap import (
12+ UMapFeatureCollection ,
13+ ShowcaseResponse ,
14+ UserMapsResponse ,
15+ UserTemplatesResponse ,
16+ )
1117from app .core .cache import get_cached , set_cached , DEFAULT_TTL
1218from app .core .config import settings
1319
1824
1925# uMap HOT OSM URLs derived from settings
2026UMAP_BASE_URL = settings .umap_base_url
27+ UMAP_LOCALE = settings .umap_locale
2128UMAP_API_BASE_URL = f"{ UMAP_BASE_URL } /en/datalayer"
2229UMAP_SHOWCASE_URL = f"{ UMAP_BASE_URL } /en/showcase/"
2330
2431# SSL verification: disabled by default for .test domains (self-signed certs)
2532# Set UMAP_VERIFY_SSL=true in production with valid certificates
2633UMAP_VERIFY_SSL = os .getenv ("UMAP_VERIFY_SSL" , "false" ).lower () == "true"
2734
35+ # Matches /es/map/slug_123 or /map/slug_123 (any locale prefix or none)
36+ _MAP_HREF_RE = re .compile (r"^/(?:[a-z]{2}/)?map/(.+)$" )
37+
2838logger .info (f"uMap Base URL: { UMAP_BASE_URL } " )
39+ logger .info (f"uMap Locale: { UMAP_LOCALE } " )
2940logger .info (f"uMap SSL Verification: { UMAP_VERIFY_SSL } " )
3041
3142
32- @router .get ("/user/templates" )
43+ def _parse_map_links (html : str ) -> list [dict ]:
44+ """Extract unique map entries from an HTML page using BeautifulSoup.
45+
46+ Returns a list of dicts with keys: id, slug, href, url.
47+ Skips ?share / ?edit variants and deduplicates by map ID.
48+ """
49+ soup = BeautifulSoup (html , "html.parser" )
50+ results = []
51+ seen : set [str ] = set ()
52+
53+ for a in soup .find_all ("a" , href = True ):
54+ href : str = a ["href" ]
55+ if "?share" in href or "?edit" in href :
56+ continue
57+ match = _MAP_HREF_RE .match (href )
58+ if not match :
59+ continue
60+ slug = match .group (1 )
61+ parts = slug .rsplit ("_" , 1 )
62+ map_id = parts [- 1 ] if len (parts ) > 1 and parts [- 1 ].isdigit () else slug
63+ if map_id in seen :
64+ continue
65+ seen .add (map_id )
66+ results .append (
67+ {"id" : map_id , "slug" : slug , "href" : href , "url" : f"{ UMAP_BASE_URL } { href } " }
68+ )
69+
70+ return results
71+
72+
73+ def _check_login_redirect (response : httpx .Response , html : str ) -> bool :
74+ """Return True if uMap redirected to the login page (auth failed)."""
75+ return "/login" in str (response .url ) or "Iniciar sesión" in html
76+
77+
78+ @router .get ("/user/templates" , response_model = UserTemplatesResponse )
3379async def get_user_templates (request : Request ) -> dict :
3480 """Fetch the user's templates page from uMap and return a JSON list.
3581
3682 Uses Hanko authentication cookie to authenticate with the uMap instance.
37- Parses the returned HTML for links to maps of the form
38- `/es/map/map_<id>` (also accepts `/map/map_<id>` without locale).
3983 Returns JSON with an array under `templates` containing objects with
40- `id`, `href` and `url ` keys.
84+ `id`, `href`, `url` and `slug ` keys.
4185 """
42- # Extract Hanko cookie from the incoming request
4386 hanko_cookie = request .cookies .get ("hanko" )
44-
4587 logger .info (f"[Templates] Hanko cookie present: { bool (hanko_cookie )} " )
4688
4789 if not hanko_cookie :
4890 logger .warning ("No Hanko cookie found in request" )
4991 raise HTTPException (
5092 status_code = 401 ,
51- detail = "Hanko authentication cookie not found. Please log in."
93+ detail = "Hanko authentication cookie not found. Please log in." ,
5294 )
5395
54- url = f"{ UMAP_BASE_URL } /es /me/templates"
96+ url = f"{ UMAP_BASE_URL } /{ UMAP_LOCALE } /me/templates"
5597 logger .info (f"[Templates] Target URL: { url } " )
5698
5799 try :
58100 async with httpx .AsyncClient (
59101 timeout = 30.0 ,
60102 verify = UMAP_VERIFY_SSL ,
61- follow_redirects = True
103+ follow_redirects = True ,
62104 ) as client :
63- headers = {"User-Agent" : "portal-umap-client/1.0" }
64- # Send hanko token as cookie (uMap's HankoAuthMiddleware reads from cookies)
65- cookies = {"hanko" : hanko_cookie }
66- response = await client .get (url , headers = headers , cookies = cookies )
105+ response = await client .get (
106+ url ,
107+ headers = {"User-Agent" : "portal-umap-client/1.0" },
108+ cookies = {"hanko" : hanko_cookie },
109+ )
67110 response .raise_for_status ()
68111 html = response .text
69112
70113 logger .info (f"[Templates] Final URL: { response .url } " )
71114 logger .info (f"[Templates] Response length: { len (html )} chars" )
72-
73- # Log first 500 chars of HTML for debugging
74115 logger .debug (f"[Templates] HTML preview: { html [:500 ]} " )
75116
76- # Check if we were redirected to login page (auth failed)
77- # Look for login indicators in URL or page title
78- is_login_page = "/login" in str (response .url ) or "Iniciar sesión" in html
79- if is_login_page :
117+ if _check_login_redirect (response , html ):
80118 logger .warning ("[Templates] Auth failed - redirected to login page" )
81119 raise HTTPException (
82120 status_code = 401 ,
83- detail = "uMap authentication failed. Your session may have expired."
121+ detail = "uMap authentication failed. Your session may have expired." ,
84122 )
85123
86- # Find hrefs like /es/map/mapa-sin-titulo_1814 or /map/some-slug_123
87- # URL format: /<locale>/map/<slug>_<id> where slug can be any string
88- pattern = re .compile (r'href=["\'](?P<href>/(?:[a-z]{2}/)?map/(?P<slug>[^"\']+))["\']' )
89- matches = pattern .findall (html )
90-
91- templates = []
92- seen = set ()
93- for href , slug in matches :
94- # Skip hrefs with ?share or ?edit query parameters
95- if "?share" in href or "?edit" in href :
96- continue
97- # Extract ID from slug (format: "name_123" -> "123")
98- # The ID is the number at the end after the last underscore
99- parts = slug .rsplit ('_' , 1 )
100- template_id = parts [- 1 ] if len (parts ) > 1 and parts [- 1 ].isdigit () else slug
101- if template_id in seen :
102- continue
103- seen .add (template_id )
104- full_url = f"{ UMAP_BASE_URL } { href } "
105- templates .append ({"id" : template_id , "href" : href , "url" : full_url , "slug" : slug })
106-
107- logger .info (f"[Templates] Found { len (templates )} templates from { len (matches )} matches" )
124+ templates = _parse_map_links (html )
125+ logger .info (f"[Templates] Found { len (templates )} templates" )
108126 return {"templates" : templates }
127+
109128 except httpx .HTTPStatusError as e :
110129 logger .error (f"HTTP Error: { e .response .status_code } - { e .response .text } " )
111- raise HTTPException (status_code = e .response .status_code , detail = f"Error fetching uMap templates: { e .response .text } " )
130+ raise HTTPException (
131+ status_code = e .response .status_code ,
132+ detail = f"Error fetching uMap templates: { e .response .text } " ,
133+ )
112134 except httpx .RequestError as e :
113135 logger .error (f"Request Error: { str (e )} " )
114136 raise HTTPException (status_code = 503 , detail = f"Connection error to uMap: { str (e )} " )
137+ except HTTPException :
138+ raise
115139 except Exception as e :
116140 logger .error (f"Unexpected error: { str (e )} " , exc_info = True )
117141 raise HTTPException (status_code = 500 , detail = f"Unexpected error: { str (e )} " )
118142
119143
120- @router .get ("/user/maps" )
144+ @router .get ("/user/maps" , response_model = UserMapsResponse )
121145async def get_user_maps (request : Request ) -> dict :
122146 """Fetch the user's maps page from uMap and return a JSON list.
123147
124148 Uses Hanko authentication cookie to authenticate with the uMap instance.
125- Parses the returned HTML for links to maps of the form `/es/map/{project}`
126- (where project can be anything like `umap-test-makeni_1813`).
127149 Returns JSON with an array under `maps` containing objects with
128- `project `, `href` and `url` keys.
150+ `id`, `slug `, `href` and `url` keys.
129151 """
130- # Extract Hanko cookie from the incoming request
131152 hanko_cookie = request .cookies .get ("hanko" )
132-
133153 logger .info (f"[Maps] Hanko cookie present: { bool (hanko_cookie )} " )
134154
135155 if not hanko_cookie :
136156 logger .warning ("No Hanko cookie found in request" )
137157 raise HTTPException (
138158 status_code = 401 ,
139- detail = "Hanko authentication cookie not found. Please log in."
159+ detail = "Hanko authentication cookie not found. Please log in." ,
140160 )
141161
142- url = f"{ UMAP_BASE_URL } /es /me"
162+ url = f"{ UMAP_BASE_URL } /{ UMAP_LOCALE } /me"
143163 logger .info (f"[Maps] Target URL: { url } " )
144164
145165 try :
146166 async with httpx .AsyncClient (
147167 timeout = 30.0 ,
148168 verify = UMAP_VERIFY_SSL ,
149- follow_redirects = True
169+ follow_redirects = True ,
150170 ) as client :
151- headers = {"User-Agent" : "portal-umap-client/1.0" }
152- # Send hanko token as cookie (uMap's HankoAuthMiddleware reads from cookies)
153- cookies = {"hanko" : hanko_cookie }
154- response = await client .get (url , headers = headers , cookies = cookies )
171+ response = await client .get (
172+ url ,
173+ headers = {"User-Agent" : "portal-umap-client/1.0" },
174+ cookies = {"hanko" : hanko_cookie },
175+ )
155176 response .raise_for_status ()
156177 html = response .text
157178
158179 logger .info (f"[Maps] Final URL: { response .url } " )
159180 logger .info (f"[Maps] Response length: { len (html )} chars" )
160181
161- # Check if we were redirected to login page (auth failed)
162- is_login_page = "/login" in str (response .url ) or "Iniciar sesión" in html
163- if is_login_page :
182+ if _check_login_redirect (response , html ):
164183 logger .warning ("[Maps] Auth failed - redirected to login page" )
165184 raise HTTPException (
166185 status_code = 401 ,
167- detail = "uMap authentication failed. Your session may have expired."
186+ detail = "uMap authentication failed. Your session may have expired." ,
168187 )
169188
170- # Find hrefs like /es/map/umap-test-makeni_1813 or /map/some-slug_123
171- # URL format: /<locale>/map/<slug>_<id> where slug can be any string
172- pattern = re .compile (r'href=["\'](?P<href>/(?:[a-z]{2}/)?map/(?P<slug>[^"\']+))["\']' )
173- matches = pattern .findall (html )
174-
175- maps = []
176- seen = set ()
177- for href , slug in matches :
178- # Skip hrefs with ?share or ?edit query parameters
179- if "?share" in href or "?edit" in href :
180- continue
181- # Extract ID from slug (format: "name_123" -> "123")
182- parts = slug .rsplit ('_' , 1 )
183- map_id = parts [- 1 ] if len (parts ) > 1 and parts [- 1 ].isdigit () else slug
184- if map_id in seen :
185- continue
186- seen .add (map_id )
187- full_url = f"{ UMAP_BASE_URL } { href } "
188- maps .append ({"id" : map_id , "slug" : slug , "href" : href , "url" : full_url })
189-
190- logger .info (f"[Maps] Found { len (maps )} maps from { len (matches )} matches" )
189+ maps = _parse_map_links (html )
190+ logger .info (f"[Maps] Found { len (maps )} maps" )
191191 return {"maps" : maps }
192+
192193 except httpx .HTTPStatusError as e :
193194 logger .error (f"HTTP Error: { e .response .status_code } - { e .response .text } " )
194- raise HTTPException (status_code = e .response .status_code , detail = f"Error fetching uMap maps: { e .response .text } " )
195+ raise HTTPException (
196+ status_code = e .response .status_code ,
197+ detail = f"Error fetching uMap maps: { e .response .text } " ,
198+ )
195199 except httpx .RequestError as e :
196200 logger .error (f"Request Error: { str (e )} " )
197201 raise HTTPException (status_code = 503 , detail = f"Connection error to uMap: { str (e )} " )
202+ except HTTPException :
203+ raise
198204 except Exception as e :
199205 logger .error (f"Unexpected error: { str (e )} " , exc_info = True )
200206 raise HTTPException (status_code = 500 , detail = f"Unexpected error: { str (e )} " )
201207
202208
203-
204209@router .get ("/showcase" , response_model = ShowcaseResponse )
205210async def get_showcase () -> ShowcaseResponse :
206211 """Fetch the list of featured maps from the uMap showcase page.
@@ -263,6 +268,7 @@ async def get_showcase() -> ShowcaseResponse:
263268 }
264269 set_cached (cache_key , result , DEFAULT_TTL )
265270 return result
271+
266272 except httpx .HTTPStatusError as e :
267273 logger .error (f"[Showcase] HTTP Error: { e .response .status_code } " )
268274 raise HTTPException (
@@ -286,7 +292,7 @@ async def get_showcase() -> ShowcaseResponse:
286292@router .get ("/{location}/{project_id}" , response_model = UMapFeatureCollection )
287293async def get_umap_data (
288294 location : str = Path (..., description = "Location identifier" ),
289- project_id : str = Path (..., description = "The project UUID to retrieve" )
295+ project_id : str = Path (..., description = "The project UUID to retrieve" ),
290296) -> dict :
291297 """
292298 Fetch GeoJSON data from uMap HOT OSM.
@@ -341,7 +347,7 @@ async def get_umap_data(
341347 async with httpx .AsyncClient (
342348 timeout = 30.0 ,
343349 verify = UMAP_VERIFY_SSL ,
344- follow_redirects = True
350+ follow_redirects = True ,
345351 ) as client :
346352 response = await client .get (url )
347353 response .raise_for_status ()
0 commit comments