Skip to content

Commit 1632f8b

Browse files
committed
browser: make outlink gathering reusable
The outlink JS always executes its function, which means we can't safely inject it if we want other JavaScript to be able to execute outlink extraction. This updates it to instead expose a single outlink gathering function, which is then explicitly called by the outlink gatherer after injecting the JS.
1 parent 47f5c06 commit 1632f8b

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

brozzler/browser.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -770,12 +770,26 @@ def extract_outlinks(self, timeout=60):
770770
self.logger.info("extracting outlinks")
771771
self.websock_thread.expect_result(self._command_id.peek())
772772
js = brozzler.jinja2_environment().get_template("extract-outlinks.js").render()
773+
# This defines the method but doesn't extract outlinks yet
773774
msg_id = self.send_to_chrome(
774-
method="Runtime.evaluate", params={"expression": js}
775+
method="Runtime.evaluate",
776+
suppress_logging=True,
777+
params={"expression": js},
775778
)
776779
self._wait_for(
777780
lambda: self.websock_thread.received_result(msg_id), timeout=timeout
778781
)
782+
self.websock_thread.expect_result(self._command_id.peek())
783+
784+
# Now we actually do outlink extraction
785+
msg_id = self.send_to_chrome(
786+
method="Runtime.evaluate",
787+
params={"expression": "__brzl_outlinksString()"},
788+
)
789+
self._wait_for(
790+
lambda: self.websock_thread.received_result(msg_id), timeout=timeout
791+
)
792+
779793
message = self.websock_thread.pop_result(msg_id)
780794
if (
781795
"result" in message

brozzler/js-templates/extract-outlinks.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,6 @@ var __brzl_compileOutlinks = function(frame) {
3636

3737
return outlinks;
3838
}
39-
__brzl_compileOutlinks(window).join('\n');
39+
var __brzl_outlinksString = function() {
40+
return __brzl_compileOutlinks(window).join('\n');
41+
}

0 commit comments

Comments
 (0)