Skip to content

Commit a8af8d4

Browse files
committed
Switched to _detected_technologies for BigQuery
For #29
1 parent fab139a commit a8af8d4

File tree

1 file changed

+16
-34
lines changed

1 file changed

+16
-34
lines changed

HTTPArchive/httparchive.py

+16-34
Original file line numberDiff line numberDiff line change
@@ -222,41 +222,23 @@ def get_technologies(page):
222222
if not page:
223223
return None
224224

225-
app_names = page.get("_detected_apps", {})
226-
categories = page.get("_detected", {})
227-
228-
# When there are no detected apps, it appears as an empty array.
229-
if isinstance(app_names, list):
230-
app_names = {}
231-
categories = {}
232-
233225
technologies = {}
234-
app_map = {}
235-
for app, info_list in app_names.items():
236-
if not info_list:
237-
continue
238-
239-
# There may be multiple info values. Add each to the map.
240-
for info in info_list.split(","):
241-
app_id = f"{app} {info}" if len(info) > 0 else app
242-
app_map[app_id] = app
243-
244-
for category, apps in categories.items():
245-
for app_id in apps.split(","):
246-
app = app_map.get(app_id)
247-
info = ""
248-
if app is None:
249-
app = app_id
250-
else:
251-
info = app_id[len(app):].strip()
252-
253-
technologies[app] = technologies.get(
254-
app, {"technology": app, "info": [], "categories": []}
255-
)
256-
257-
technologies.get(app).get("info").append(info)
258-
if category not in technologies.get(app).get("categories"):
259-
technologies.get(app).get("categories").append(category)
226+
try:
227+
detected = page.get("_detected_technologies", {})
228+
for id in detected:
229+
entry = detected[id]
230+
if 'name' in entry:
231+
name = entry['name']
232+
if name not in technologies:
233+
technologies[name] = {"technology": name, "info": [], "categories": []}
234+
if 'version' in entry and len(entry['version'].strip()):
235+
technologies[name]['info'].append(entry['version'].strip())
236+
if 'categories' in technologies[name]:
237+
for cat in technologies[name]['categories']:
238+
if 'name' in cat:
239+
technologies[name]['categories'].append(cat['name'].strip())
240+
except Exception:
241+
logging.exception("Error processing technologies")
260242

261243
return list(technologies.values())
262244

0 commit comments

Comments
 (0)