Skip to content

Commit 3081e0f

Browse files
tw4likreymer
andauthored
Add optional client-side playback to pywb (#928)
This PR adds optional client-side replay in pywb's framed replay mode, using wabac.js. This is implemented using wabac.js's live proxy mode, similar to the implementation by Alex Osborne's proof of concept and enabled via the config.yaml file. Documentation has also been added. The service worker proxies to the original pywb URLs and allows for 'graceful fallback' if service workers are not supported. Client side replay can be enabled by setting `client_side_replay: true` in config.yaml The wabac.js service worker is added to the pywb static directory at installation time via setup.py. The wabac.js version can be bumped via a constant in that file (current version is 2.22.12) In addition, a few small housekeeping changes are also included: - The Python version in the pywb Dockerfile is updated to 3.11 to avoid using an unsupported version of Python - Similarly, CI now runs on Python 3.9-3.11 to drop older versions that are no longer supported in GH Actions runners - wombat updated to latest 2.8.10 bump version to 2.9.0-beta.0 --------- Co-authored-by: Ilya Kreymer <[email protected]>
1 parent 4f6018d commit 3081e0f

16 files changed

+184
-13
lines changed

.github/workflows/ci.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@ name: CI
22

33
on: [push, pull_request]
44

5+
concurrency:
6+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
7+
cancel-in-progress: true
8+
59
jobs:
610
unit-tests:
11+
timeout-minutes: 20
712
runs-on: ubuntu-latest
813
strategy:
914
max-parallel: 3

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# wabac sw
2+
pywb/static/wabacSW.js
3+
14
*.py[cod]
25

36
# C extensions

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG PYTHON=python:3.8
1+
ARG PYTHON=python:3.11
22

33
FROM $PYTHON
44

config.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ enable_memento: true
2727
# Replay content in an iframe
2828
framed_replay: true
2929

30+
# Use wabac.js-style client-side replay system for framed replay
31+
client_side_replay: false
32+
33+
# Enable classic redirect behavior
3034
redirect_to_exact: true
3135

3236
# Uncomment and change to set default locale

docs/manual/configuring.rst

+13
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,19 @@ To disable framed replay add:
4444

4545
Note: pywb also supports HTTP/S **proxy mode** which requires additional setup. See :ref:`https-proxy` for more details.
4646

47+
.. _client_side_replay:
48+
49+
Client-side replay
50+
------------------
51+
52+
In pywb 2.9.0+, client-side replay can optionally be enabled for framed replay.
53+
54+
To enable client-side framed replay add:
55+
56+
``client_side_replay: true`` to your config.yaml
57+
58+
In this mode, pywb will use the `wabac.js <https://github.com/webrecorder/wabac.js>`_ service worker-based replay system that underlies `ReplayWeb.page <https://replayweb.page/>`_ in live proxy mode. This may result in better replay for certain JavaScript-heavy sites.
59+
4760

4861
.. _dir_structure:
4962

pywb/apps/frontendapp.py

+16
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ def __init__(self, config_file=None, custom_config=None):
8181

8282
self.debug = config.get('debug', False)
8383

84+
self.client_side_replay = config.get('client_side_replay', False)
85+
8486
self.warcserver_server = GeventServer(self.warcserver, port=0)
8587

8688
self.proxy_prefix = None # the URL prefix to be used for the collection with proxy mode (e.g. /coll/id_/)
@@ -130,6 +132,9 @@ def _init_routes(self):
130132
coll_prefix = '/<coll>'
131133
self.url_map.add(Rule('/', endpoint=self.serve_home))
132134

135+
if self.client_side_replay:
136+
self.url_map.add(Rule('/{0}/sw.js'.format(self.static_prefix), endpoint=self.serve_wabac_service_worker))
137+
133138
self._init_coll_routes(coll_prefix)
134139

135140
if self.proxy_prefix is not None:
@@ -818,6 +823,17 @@ def proxy_fetch(self, env, url):
818823
response.add_access_control_headers(env=env)
819824
return response
820825

826+
def serve_wabac_service_worker(self, env):
827+
"""Serve wabac.js service worker.
828+
829+
:param dict env: The WSGI environment dictionary
830+
:return: WbResponse with service worker
831+
:rtype: WbResponse
832+
"""
833+
response = self.serve_static(env, coll='', filepath='wabacSW.js')
834+
response.status_headers['Service-Worker-Allowed'] = '/'
835+
return response
836+
821837

822838
# ============================================================================
823839
class MetadataCache(object):

pywb/apps/rewriterapp.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ def __init__(self, framed_replay=False, jinja_env=None, config=None, paths=None)
8484
self._html_templ('head_insert_html'),
8585
self.custom_banner_view)
8686

87+
self.client_side_replay = self.config.get('client_side_replay', False)
88+
8789
self.frame_insert_view = TopFrameView(self.jinja_env,
8890
self._html_templ('frame_insert_html'),
8991
self.banner_view)
@@ -933,7 +935,8 @@ def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwar
933935
environ,
934936
self.frame_mod,
935937
self.replay_mod,
936-
coll='',
938+
self.client_side_replay,
939+
coll=kwargs.get("coll"),
937940
extra_params=extra_params)
938941

939942
return None

pywb/rewrite/templateview.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ def get_top_frame(self, wb_url,
388388
env,
389389
frame_mod,
390390
replay_mod,
391+
client_side_replay,
391392
coll='',
392393
extra_params=None):
393394
"""
@@ -397,6 +398,7 @@ def get_top_frame(self, wb_url,
397398
:param dict env: The WSGI environment dictionary for the request this template is being rendered for
398399
:param str frame_mod: The modifier to be used for framing (e.g. if_)
399400
:param str replay_mod: The modifier to be used in the URL of the page being replayed (e.g. mp_)
401+
:param bool client_side_replay: Boolean indicating whether to use wabac.js-based client side replay
400402
:param str coll: The name of the collection this template is being rendered for
401403
:param dict extra_params: Additional parameters to be supplied to the Jninja template render method
402404
:return: The frame insert string
@@ -423,8 +425,11 @@ def get_top_frame(self, wb_url,
423425

424426
'embed_url': embed_url,
425427
'is_proxy': is_proxy,
428+
'client_side_replay': client_side_replay,
426429
'timestamp': timestamp,
427-
'url': wb_url.get_url()
430+
'url': wb_url.get_url(),
431+
432+
'sw_prefix': env.get('pywb.app_prefix', '')
428433
}
429434

430435
if extra_params:

pywb/static/loadWabac.js

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
class WabacReplay
2+
{
3+
constructor(prefix, url, ts, staticPrefix, coll, swScopePrefix) {
4+
this.prefix = prefix;
5+
this.url = url;
6+
this.ts = ts;
7+
this.staticPrefix = staticPrefix;
8+
this.collName = coll;
9+
this.isRoot = coll === "$root";
10+
this.archivePrefix = this.isRoot ? "/" : `/${this.collName}/`;
11+
this.swScope = swScopePrefix;
12+
this.adblockUrl = undefined;
13+
14+
this.queryParams = {"replayPrefix": ""};
15+
if (this.isRoot) {
16+
this.queryParams["root"] = "$root";
17+
}
18+
}
19+
20+
async init() {
21+
const scope = this.swScope + "/";
22+
23+
await navigator.serviceWorker.register(
24+
`${this.staticPrefix}/sw.js?` + new URLSearchParams(this.queryParams).toString(),
25+
{ scope },
26+
);
27+
28+
let initedResolve = null;
29+
30+
const inited = new Promise((resolve) => initedResolve = resolve);
31+
32+
navigator.serviceWorker.addEventListener("message", (event) => {
33+
if (event.data.msg_type === "collAdded") {
34+
// the replay is ready to be loaded when this message is received
35+
initedResolve();
36+
}
37+
});
38+
39+
const proxyPrefix = "";
40+
41+
const msg = {
42+
msg_type: "addColl",
43+
name: this.collName,
44+
type: "live",
45+
root: this.isRoot,
46+
file: {"sourceUrl": `proxy:${proxyPrefix}`},
47+
skipExisting: true,
48+
extraConfig: {
49+
prefix: proxyPrefix,
50+
isLive: false,
51+
baseUrl: this.prefix,
52+
baseUrlAppendReplay: true,
53+
noPostToGet: false,
54+
archivePrefix: this.archivePrefix,
55+
archiveMod: "ir_",
56+
adblockUrl: this.adblockUrl,
57+
noPostToGet: true,
58+
},
59+
};
60+
61+
if (!navigator.serviceWorker.controller) {
62+
navigator.serviceWorker.addEventListener("controllerchange", () => {
63+
navigator.serviceWorker.controller.postMessage(msg);
64+
});
65+
} else {
66+
navigator.serviceWorker.controller.postMessage(msg);
67+
}
68+
69+
window.addEventListener("message", event => {
70+
let data = event.data;
71+
if (window.WBBanner) {
72+
window.WBBanner.onMessage(event);
73+
}
74+
if (data.wb_type === "load" || data.wb_type === "replace-url") {
75+
history.replaceState({}, data.title, this.prefix + data.ts + '/' + data.url);
76+
}
77+
});
78+
79+
if (inited) {
80+
await inited;
81+
}
82+
83+
this.load_url(this.url, this.ts);
84+
}
85+
86+
// called by the Vue banner when the timeline is clicked
87+
load_url(url, ts) {
88+
const iframe = document.querySelector('#replay_iframe');
89+
iframe.src = `${this.swScope}${this.archivePrefix}${ts}mp_/${url}`;
90+
}
91+
}

pywb/static/wombat.js

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)