|
| 1 | +import pytest |
| 2 | +from auto_archiver.utils.url import ( |
| 3 | + is_auth_wall, |
| 4 | + check_url_or_raise, |
| 5 | + domain_for_url, |
| 6 | + is_relevant_url, |
| 7 | + remove_get_parameters, |
| 8 | + twitter_best_quality_url, |
| 9 | +) |
| 10 | + |
| 11 | + |
| 12 | +@pytest.mark.parametrize( |
| 13 | + "url, is_auth", |
| 14 | + [ |
| 15 | + ("https://example.com", False), |
| 16 | + ("https://t.me/c/abc/123", True), |
| 17 | + ("https://t.me/not-private/", False), |
| 18 | + ("https://instagram.com", True), |
| 19 | + ("https://www.instagram.com", True), |
| 20 | + ("https://www.instagram.com/p/INVALID", True), |
| 21 | + ("https://www.instagram.com/p/C4QgLbrIKXG/", True), |
| 22 | + ], |
| 23 | +) |
| 24 | +def test_is_auth_wall(url, is_auth): |
| 25 | + assert is_auth_wall(url) == is_auth |
| 26 | + |
| 27 | + |
| 28 | +@pytest.mark.parametrize( |
| 29 | + "url, raises", |
| 30 | + [ |
| 31 | + ("http://example.com", False), |
| 32 | + ("https://example.com", False), |
| 33 | + ("ftp://example.com", True), |
| 34 | + ("http://localhost", True), |
| 35 | + ("http://", True), |
| 36 | + ], |
| 37 | +) |
| 38 | +def test_check_url_or_raise(url, raises): |
| 39 | + if raises: |
| 40 | + with pytest.raises(ValueError): |
| 41 | + check_url_or_raise(url) |
| 42 | + else: |
| 43 | + assert check_url_or_raise(url) |
| 44 | + |
| 45 | + |
| 46 | +@pytest.mark.parametrize( |
| 47 | + "url, domain", |
| 48 | + [ |
| 49 | + ("https://example.com", "example.com"), |
| 50 | + ("https://www.example.com", "www.example.com"), |
| 51 | + ("https://www.example.com/path", "www.example.com"), |
| 52 | + ("https://", ""), |
| 53 | + ("http://localhost", "localhost"), |
| 54 | + ], |
| 55 | +) |
| 56 | +def test_domain_for_url(url, domain): |
| 57 | + assert domain_for_url(url) == domain |
| 58 | + |
| 59 | + |
| 60 | +@pytest.mark.parametrize( |
| 61 | + "url, without_get", |
| 62 | + [ |
| 63 | + ("https://example.com", "https://example.com"), |
| 64 | + ("https://example.com?utm_source=example", "https://example.com"), |
| 65 | + ("https://example.com?utm_source=example&other=1", "https://example.com"), |
| 66 | + ("https://example.com/something", "https://example.com/something"), |
| 67 | + ("https://example.com/something?utm_source=example", "https://example.com/something"), |
| 68 | + ], |
| 69 | +) |
| 70 | +def test_remove_get_parameters(url, without_get): |
| 71 | + assert remove_get_parameters(url) == without_get |
| 72 | + |
| 73 | + |
| 74 | +@pytest.mark.parametrize( |
| 75 | + "url, relevant", |
| 76 | + [ |
| 77 | + ("https://example.com", True), |
| 78 | + ("https://example.com/favicon.ico", False), |
| 79 | + ("https://twimg.com/profile_images", False), |
| 80 | + ("https://twimg.com/something/default_profile_images", False), |
| 81 | + ("https://scontent.cdninstagram.com/username/150x150.jpg", False), |
| 82 | + ("https://static.cdninstagram.com/rsrc.php/", False), |
| 83 | + ("https://telegram.org/img/emoji/", False), |
| 84 | + ("https://www.youtube.com/s/gaming/emoji/", False), |
| 85 | + ("https://yt3.ggpht.com/default-user=", False), |
| 86 | + ("https://www.youtube.com/s/search/audio/", False), |
| 87 | + ("https://ok.ru/res/i/", False), |
| 88 | + ("https://vk.com/emoji/", False), |
| 89 | + ("https://vk.com/images/", False), |
| 90 | + ("https://vk.com/images/reaction/", False), |
| 91 | + ("https://wikipedia.org/static", False), |
| 92 | + ("https://example.com/file.svg", False), |
| 93 | + ("https://example.com/file.ico", False), |
| 94 | + ("https://example.com/file.mp4", True), |
| 95 | + ("https://example.com/150x150.jpg", True), |
| 96 | + ("https://example.com/rsrc.php/", True), |
| 97 | + ("https://example.com/img/emoji/", True), |
| 98 | + ], |
| 99 | +) |
| 100 | +def test_is_relevant_url(url, relevant): |
| 101 | + assert is_relevant_url(url) == relevant |
| 102 | + |
| 103 | + |
| 104 | +@pytest.mark.parametrize( |
| 105 | + "url, best_quality", |
| 106 | + [ |
| 107 | + ("https://twitter.com/some_image.jpg?name=small", "https://twitter.com/some_image.jpg?name=orig"), |
| 108 | + ("https://twitter.com/some_image.jpg", "https://twitter.com/some_image.jpg"), |
| 109 | + ("https://twitter.com/some_image.jpg?name=orig", "https://twitter.com/some_image.jpg?name=orig"), |
| 110 | + ], |
| 111 | +) |
| 112 | +def test_twitter_best_quality_url(url, best_quality): |
| 113 | + assert twitter_best_quality_url(url) == best_quality |
0 commit comments