Skip to content

Commit bacee4f

Browse files
committed
Implement access rule date support
1 parent eabf806 commit bacee4f

5 files changed

Lines changed: 70 additions & 17 deletions

File tree

pywb/warcserver/access_checker.py

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,31 @@ def check_embargo(self, url, ts):
173173
actual = datetime.now(timezone.utc) - older
174174
return access if actual > dt else None
175175

176+
def check_date_access(
177+
self, ts, access, default_access, rule
178+
):
179+
"""Return access based on date fields in access rule
180+
181+
If a date-based rule exists and condition is not met, return default rule
182+
If no date-based rule exists, return access
183+
"""
184+
if not rule:
185+
return access
186+
187+
dt = timestamp_to_datetime(ts, tz_aware=True)
188+
189+
before_ts = rule.get('before')
190+
if before_ts:
191+
before = timestamp_to_datetime(before_ts, tz_aware=True)
192+
return access if dt < before else default_access
193+
194+
after_ts = rule.get('after')
195+
if after_ts:
196+
after = timestamp_to_datetime(after_ts, tz_aware=True)
197+
return access if dt > after else default_access
198+
199+
return access
200+
176201
def create_access_aggregator(self, source_files):
177202
"""Creates a new AccessRulesAggregator using the supplied list
178203
of access control file names
@@ -300,10 +325,7 @@ def wrap_iter(self, cdx_iter, acl_user):
300325
:param str acl_user: The user associated with this request (optional)
301326
:return: The wrapped cdx object iterator
302327
"""
303-
last_rule = None
304-
last_url = None
305-
last_user = None
306-
rule = None
328+
default_access = self.default_rule['access']
307329

308330
for cdx in cdx_iter:
309331
url = cdx.get('url')
@@ -314,16 +336,17 @@ def wrap_iter(self, cdx_iter, acl_user):
314336
yield cdx
315337
continue
316338

339+
rule = None
317340
access = None
341+
318342
if self.aggregator:
319-
# TODO: optimization until date range support is included
320-
if url == last_url and acl_user == last_user:
321-
rule = last_rule
322-
else:
323-
rule = self.find_access_rule(url, timestamp,
324-
cdx.get('urlkey'),
325-
cdx.get('source-coll'),
326-
acl_user)
343+
rule = self.find_access_rule(
344+
url,
345+
timestamp,
346+
cdx.get('urlkey'),
347+
cdx.get('source-coll'),
348+
acl_user
349+
)
327350

328351
access = rule.get('access', 'exclude')
329352

@@ -332,18 +355,20 @@ def wrap_iter(self, cdx_iter, acl_user):
332355
if embargo_access and embargo_access != 'allow':
333356
access = embargo_access
334357

358+
# Allow more specific rules to override embargoes
359+
if access != 'exclude':
360+
access = self.check_date_access(
361+
timestamp, access, default_access, rule
362+
)
363+
335364
if access == 'exclude':
336365
continue
337366

338367
if not access:
339-
access = self.default_rule['access']
368+
access = default_access
340369

341370
if access == 'allow_ignore_embargo':
342371
access = 'allow'
343372

344373
cdx['access'] = access
345374
yield cdx
346-
347-
last_rule = rule
348-
last_url = url
349-
last_user = acl_user

sample_archive/access/after.aclj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
org,iana)/ - {"access": "allow", "url": "http://www.iana.org/", "after": "20140126"}

sample_archive/access/before.aclj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
org,iana)/ - {"access": "allow", "url": "http://www.iana.org/", "before": "20140126"}

tests/config_test_access.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,20 @@ collections:
6262
acl_paths:
6363
- ./sample_archive/access/pywb.aclj
6464

65+
pywb-acl-before:
66+
index_paths: ./sample_archive/cdx/
67+
archive_paths: ./sample_archive/warcs/
68+
default_access: block
69+
acl_paths:
70+
- ./sample_archive/access/before.aclj
71+
72+
pywb-acl-after:
73+
index_paths: ./sample_archive/cdx/
74+
archive_paths: ./sample_archive/warcs/
75+
default_access: block
76+
acl_paths:
77+
- ./sample_archive/access/after.aclj
78+
6579
pywb-wildcard-surt:
6680
index_paths: ./sample_archive/cdx/
6781
archive_paths: ./sample_archive/warcs/

tests/test_acl.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,15 @@ def test_allow_all_acl_user_specific(self):
102102
assert 'Access Blocked' in resp.text
103103

104104
resp = self.testapp.get('/pywb-wildcard-surt/mp_/http://example.com/', headers={"X-Pywb-Acl-User": "staff"}, status=200)
105+
106+
def test_acl_before(self):
107+
resp = self.testapp.get('/pywb-acl-before/20140127171238mp_/http://www.iana.org/', status=451)
108+
assert 'Access Blocked' in resp.text
109+
110+
resp = self.testapp.get('/pywb-acl-before/20140126200624mp_/http://www.iana.org/', status=200)
111+
112+
def test_acl_after(self):
113+
resp = self.testapp.get('/pywb-acl-after/20140126200624mp_/http://www.iana.org/', status=451)
114+
assert 'Access Blocked' in resp.text
115+
116+
resp = self.testapp.get('/pywb-acl-after/20140127171238mp_/http://www.iana.org/', status=200)

0 commit comments

Comments
 (0)