|
| 1 | +import os.path |
| 2 | + |
1 | 3 | import pytest
|
2 | 4 | from bs4 import BeautifulSoup
|
3 | 5 |
|
4 |
| -from xklb.utils.web import extract_nearby_text, safe_unquote |
| 6 | +from xklb.utils.web import extract_nearby_text, safe_unquote, url_to_local_path |
| 7 | + |
| 8 | + |
| 9 | +def test_url_to_local_path(): |
| 10 | + tests = [ |
| 11 | + ("http://example.com/path/to/resource.html", "example.com/path/to/resource.html"), |
| 12 | + ("https://another-example.com/a/b/c/d/e/f/g.txt", "another-example.com/a/b/c/d/e/f/g.txt"), |
| 13 | + ("http://example.com/space%20in%20path/to/resource.html", "example.com/space in path/to/resource.html"), |
| 14 | + ( |
| 15 | + "https://another-example.com/path/to/special%20characters%21%40%23.txt", |
| 16 | + "another-example.com/path/to/special [email protected]", |
| 17 | + ), |
| 18 | + ( |
| 19 | + "http://example.com/interesting%2Fpath%2Fwith%2Fslashes/resource.txt", |
| 20 | + "example.com/interesting/path/with/slashes/resource.txt", |
| 21 | + ), |
| 22 | + ( |
| 23 | + "http://example.com/interesting%2F..%2F..%2F..%2F../../path/resource.txt", |
| 24 | + "example.com/interesting/_/_/_/_/_/path/resource.txt", |
| 25 | + ), |
| 26 | + ] |
| 27 | + |
| 28 | + for url, expected in tests: |
| 29 | + result = url_to_local_path(url) |
| 30 | + assert os.path.normpath(result) == os.path.normpath(expected) |
| 31 | + |
| 32 | + |
| 33 | +class MockResponse: |
| 34 | + def __init__(self, headers): |
| 35 | + self.headers = headers |
| 36 | + |
| 37 | + |
| 38 | +@pytest.mark.parametrize( |
| 39 | + "url, output_path, output_prefix, response_headers, expected", |
| 40 | + [ |
| 41 | + # Content-Disposition header provides the filename |
| 42 | + ( |
| 43 | + "http://example.com/path/to/resource", |
| 44 | + None, |
| 45 | + None, |
| 46 | + {"Content-Disposition": 'attachment; filename="downloaded_file.txt"'}, |
| 47 | + "example.com/path/to/downloaded_file.txt", |
| 48 | + ), |
| 49 | + ( |
| 50 | + "http://example.com/path/to/resource/", |
| 51 | + None, |
| 52 | + None, |
| 53 | + {"Content-Disposition": 'attachment; filename="downloaded_file.txt"'}, |
| 54 | + "example.com/path/to/resource/downloaded_file.txt", |
| 55 | + ), |
| 56 | + # No Content-Disposition, filename derived from URL |
| 57 | + ("http://example.com/path/to/resource.html", None, None, {}, "example.com/path/to/resource.html"), |
| 58 | + # output_path provided, other parameters ignored except for output prefix |
| 59 | + ("http://example.com/t/test.txt", "custom/path/custom_file.txt", None, {}, "custom/path/custom_file.txt"), |
| 60 | + ("http://example.com/t/test.txt", "custom/path/custom_file.txt", "", {}, "custom/path/custom_file.txt"), |
| 61 | + ( |
| 62 | + "http://example.com/t/test.txt", |
| 63 | + "/custom/path/custom_file.txt", |
| 64 | + "dir/dir2/", |
| 65 | + {}, |
| 66 | + "/custom/path/custom_file.txt", |
| 67 | + ), |
| 68 | + ( |
| 69 | + "http://example.com/t/test.txt", |
| 70 | + "custom/path/custom_file.txt", |
| 71 | + "dir/dir2/", |
| 72 | + {}, |
| 73 | + "dir/dir2/custom/path/custom_file.txt", |
| 74 | + ), |
| 75 | + # output_prefix provided, appended to generated output path |
| 76 | + ("http://example.com/some/resource", None, "/prefix/path", {}, "/prefix/path/example.com/some/resource"), |
| 77 | + # Illegal characters in filename from Content-Disposition are replaced |
| 78 | + ( |
| 79 | + "http://example.com/test/", |
| 80 | + None, |
| 81 | + None, |
| 82 | + {"Content-Disposition": 'attachment; filename="../../me.txt"'}, |
| 83 | + "example.com/test/_/_/me.txt", |
| 84 | + ), |
| 85 | + ( |
| 86 | + "http://example.com", |
| 87 | + None, |
| 88 | + None, |
| 89 | + {"Content-Disposition": 'attachment; filename="na/me.txt"'}, |
| 90 | + "example.com/na/me.txt", |
| 91 | + ), |
| 92 | + ( |
| 93 | + "http://example.com/no-name.txt", |
| 94 | + None, |
| 95 | + None, |
| 96 | + {"Content-Disposition": "attachment"}, |
| 97 | + "example.com/no-name.txt", |
| 98 | + ), |
| 99 | + ( |
| 100 | + "http://example.com/no-name.txt", |
| 101 | + None, |
| 102 | + None, |
| 103 | + {"Content-Disposition": 'attachment; filename=""'}, |
| 104 | + "example.com/no-name.txt", |
| 105 | + ), |
| 106 | + ( |
| 107 | + "http://example.com/test/", |
| 108 | + None, |
| 109 | + None, |
| 110 | + { |
| 111 | + "Content-Disposition": 'Content-Disposition: form-data; name="file"; filename="你好.xlsx"; filename*=UTF-8' |
| 112 | + "%E4%BD%A0%E5%A5%BD.xlsx" |
| 113 | + }, |
| 114 | + "example.com/test/你好.xlsx", |
| 115 | + ), |
| 116 | + ], |
| 117 | +) |
| 118 | +def test_url_to_local_path_with_response(url, output_path, output_prefix, response_headers, expected): |
| 119 | + response = MockResponse(response_headers) |
| 120 | + result = url_to_local_path(url, response, output_path, output_prefix) |
| 121 | + assert result == expected, f"Failed for URL: {url}" |
5 | 122 |
|
6 | 123 |
|
7 | 124 | @pytest.mark.parametrize(
|
|
0 commit comments