Skip to content

Commit cc64fb0

Browse files
committed
outlinks: simplify outlink parsing
The outlinks are collected as HTMLAnchorElement objects. The previous version handled stringifying them by collecting the entire set of objects into a single newline-delimited string, then splitting it back up again in Python. It seems easier to just send back a JSON array of strings and have Python iterate over them that way.
1 parent 93bb1a9 commit cc64fb0

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

brozzler/browser.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,11 @@ def extract_outlinks(self, timeout=60):
784784
# Now we actually do outlink extraction
785785
msg_id = self.send_to_chrome(
786786
method="Runtime.evaluate",
787-
params={"expression": "__brzl_outlinksString()"},
787+
params={
788+
"expression": "__brzl_extractOutlinks()",
789+
# returnByValue ensures we can receive an array response
790+
"returnByValue": True,
791+
},
788792
)
789793
self._wait_for(
790794
lambda: self.websock_thread.received_result(msg_id), timeout=timeout
@@ -798,7 +802,7 @@ def extract_outlinks(self, timeout=60):
798802
):
799803
if message["result"]["result"]["value"]:
800804
out = []
801-
for link in message["result"]["result"]["value"].split("\n"):
805+
for link in message["result"]["result"]["value"]:
802806
try:
803807
out.append(str(urlcanon.whatwg(link)))
804808
except AddressValueError:

brozzler/js-templates/extract-outlinks.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,6 @@ var __brzl_compileOutlinks = function(frame) {
3636

3737
return outlinks;
3838
}
39-
var __brzl_outlinksString = function() {
40-
return __brzl_compileOutlinks(window).join('\n');
39+
var __brzl_extractOutlinks = function() {
40+
return __brzl_compileOutlinks(window).map(el => el.toString());
4141
}

0 commit comments

Comments
 (0)