1
- from cdx_toolkit .cli import main
2
-
3
1
import json
4
2
import sys
3
+ import os
4
+ import platform
5
+ import logging
5
6
6
7
import pytest
7
- import requests
8
+
9
+ from cdx_toolkit .cli import main
10
+
11
+ LOGGER = logging .getLogger (__name__ )
12
+
13
+
14
+ def slow_ci ():
15
+ '''
16
+ For Github Actions, the windows and macos runners are very slow.
17
+ Detect those runners, so that we can cut testing short.
18
+ '''
19
+ if os .environ .get ('FAKE_GITHUB_ACTION' ):
20
+ LOGGER .error ('limiting pytest because FAKE_GITHUB_ACTION' )
21
+ return True
22
+ if os .environ .get ('GITHUB_ACTION' ):
23
+ if platform .system () in {'Darwin' , 'Windows' }:
24
+ LOGGER .error ('limiting pytest because GITHUB_ACTION' )
25
+ return True
26
+ v = sys .version_info
27
+ if os .environ .get ('GITHUB_ACTION' ) and v .major == 3 and v .minor != 12 :
28
+ LOGGER .error ('limiting pytest because GITHUB_ACTION and py != 3.12' )
29
+ return False
30
+ LOGGER .error ('full pytest' )
8
31
9
32
10
33
def test_basics (capsys ):
@@ -80,6 +103,8 @@ def test_multi_cc1(capsys, caplog):
80
103
81
104
for t in tests :
82
105
multi_helper (t , capsys , caplog )
106
+ if slow_ci ():
107
+ break
83
108
84
109
85
110
def test_multi_cc2 (capsys , caplog ):
@@ -101,9 +126,10 @@ def test_multi_cc2(capsys, caplog):
101
126
102
127
for t in tests :
103
128
multi_helper (t , capsys , caplog )
129
+ if slow_ci ():
130
+ break
104
131
105
132
106
- @pytest .mark .skip (reason = 'needs some ratelimit love XXX' )
107
133
def test_multi_ia (capsys , caplog ):
108
134
tests = [
109
135
[{'service' : '--ia' , 'mods' : '--limit 10' , 'cmd' : 'iter' , 'rest' : 'commoncrawl.org/*' },
@@ -120,12 +146,11 @@ def test_multi_ia(capsys, caplog):
120
146
121
147
for t in tests :
122
148
multi_helper (t , capsys , caplog )
149
+ break # XXX minimize IA for ratelimit purposes
123
150
124
151
125
- def test_multi_misc_notia (capsys , caplog ):
152
+ def test_multi_misc_not_ia (capsys , caplog ):
126
153
tests = [
127
- [{'service' : '--source https://web.archive.org/cdx/search/cdx' , 'mods' : '--limit 10' , 'cmd' : 'iter' , 'rest' : 'commoncrawl.org/*' },
128
- {'count' : 10 , 'linefgrep' : 'commoncrawl.org' }],
129
154
[{'service' : '-v -v --source https://web.arc4567hive.org/cdx/search/cdx' , 'mods' : '--limit 10' , 'cmd' : 'iter' , 'rest' : 'commoncrawl.org/*' },
130
155
{'exception' : ValueError }],
131
156
[{'service' : '-v -v --source https://example.com/404' , 'mods' : '--limit 10' , 'cmd' : 'iter' , 'rest' : 'commoncrawl.org/*' },
@@ -142,11 +167,14 @@ def test_multi_misc_notia(capsys, caplog):
142
167
143
168
for t in tests :
144
169
multi_helper (t , capsys , caplog )
170
+ if slow_ci ():
171
+ break
145
172
146
173
147
- @pytest .mark .skip (reason = 'needs some ratelimit love XXX' )
148
174
def test_multi_misc_ia (capsys , caplog ):
149
175
tests = [
176
+ [{'service' : '--source https://web.archive.org/cdx/search/cdx' , 'mods' : '--limit 10' , 'cmd' : 'iter' , 'rest' : 'commoncrawl.org/*' },
177
+ {'count' : 10 , 'linefgrep' : 'commoncrawl.org' }],
150
178
[{'service' : '--ia' , 'mods' : '--limit 10' , 'cmd' : 'size' , 'rest' : 'commoncrawl.org/*' },
151
179
{'count' : 1 , 'is_int' : True }],
152
180
[{'service' : '--ia' , 'mods' : '--limit 10' , 'cmd' : 'size' , 'rest' : '--details commoncrawl.org/*' },
@@ -157,35 +185,44 @@ def test_multi_misc_ia(capsys, caplog):
157
185
158
186
for t in tests :
159
187
multi_helper (t , capsys , caplog )
188
+ break # XXX minimize IA for ratelimit reasons
160
189
161
190
162
191
def test_warc (tmpdir , caplog ):
163
192
# crash testing only, so far
164
193
165
- base = ' --limit 10 warc commoncrawl.org/*'
194
+ base = ' --limit 1 warc commoncrawl.org/*'
166
195
167
- prefixes = ('-v -v --cc' , '--ia' ,
168
- '--cc --cc-mirror https://index.commoncrawl.org/' ,
169
- '--source https://web.archive.org/cdx/search/cdx --wb https://web.archive.org/web' )
170
- suffixes = ('--prefix FOO --subprefix BAR --size 1 --creator creator --operator bob --url-fgrep common --url-fgrepv bar' ,
171
- '--prefix EMPTY --size 1 --url-fgrep bar' ,
172
- '--prefix EMPTY --size 1 --url-fgrepv common' )
196
+ prefixes = ( # note limit 2 below
197
+ '-v -v --cc' , # only case run by slow_cli
198
+ '--ia' ,
199
+ '--cc --cc-mirror https://index.commoncrawl.org/' ,
200
+ '--source https://web.archive.org/cdx/search/cdx --wb https://web.archive.org/web' ,
201
+ )
202
+ suffixes = (
203
+ '--prefix FOO --subprefix BAR --size 1 --creator creator --operator bob --url-fgrep common --url-fgrepv bar' ,
204
+ '--prefix EMPTY --size 1 --url-fgrep bar' ,
205
+ '--prefix EMPTY --size 1 --url-fgrepv common'
206
+ )
173
207
174
208
with tmpdir .as_cwd ():
175
209
for p in prefixes :
176
- if '--ia' in p or 'archive.org' in p :
177
- # XXX skip
178
- continue
179
210
cmdline = p + base
211
+ if 'cc' in cmdline :
212
+ cmdline = cmdline .replace (' 1' , ' 2' )
180
213
print (cmdline , file = sys .stderr )
181
214
args = cmdline .split ()
182
215
main (args = args )
216
+ if slow_ci ():
217
+ break
183
218
184
219
for s in suffixes :
185
220
cmdline = prefixes [0 ] + base + ' ' + s
186
221
print (cmdline , file = sys .stderr )
187
222
args = cmdline .split ()
188
223
main (args = args )
224
+ if slow_ci ():
225
+ break
189
226
190
227
assert True
191
228
@@ -195,11 +232,11 @@ def one_ia_corner(tmpdir, cmdline):
195
232
main (args = cmdline .split ())
196
233
197
234
198
- @pytest .mark .skip (reason = 'needs some ratelimit love XXX ' )
235
+ @pytest .mark .skip (reason = 'needs some ratelimit love' )
199
236
def test_warc_ia_corners (tmpdir , caplog ):
200
237
'''
201
238
To test these more properly, need to add a --exact-warcname and then postprocess.
202
- For now, these tests show up in the coverage report
239
+ For now, these are only crash tests.
203
240
'''
204
241
205
242
# revisit vivification
0 commit comments