|
| 1 | +''' |
| 2 | +MIT License |
| 3 | +
|
| 4 | +Copyright (c) 2023 Fast Data Science Ltd (https://fastdatascience.com) |
| 5 | +
|
| 6 | +Maintainer: Thomas Wood |
| 7 | +
|
| 8 | +Tutorial at https://fastdatascience.com/fast-stylometry-python-library/ |
| 9 | +
|
| 10 | +Permission is hereby granted, free of charge, to any person obtaining a copy |
| 11 | +of this software and associated documentation files (the "Software"), to deal |
| 12 | +in the Software without restriction, including without limitation the rights |
| 13 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 14 | +copies of the Software, and to permit persons to whom the Software is |
| 15 | +furnished to do so, subject to the following conditions: |
| 16 | +
|
| 17 | +The above copyright notice and this permission notice shall be included in all |
| 18 | +copies or substantial portions of the Software. |
| 19 | +
|
| 20 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 21 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 22 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 23 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 24 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 25 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 26 | +SOFTWARE. |
| 27 | +
|
| 28 | +''' |
| 29 | + |
| 30 | +import os |
| 31 | +import zipfile |
| 32 | + |
| 33 | +import wget |
| 34 | + |
| 35 | + |
| 36 | +def bar_custom(current, total, width=80): |
| 37 | + """ |
| 38 | + Display a progress bar to track the download. |
| 39 | + :param current: Current bytes downloaded |
| 40 | + :param total: Total bytes. |
| 41 | + :param width: Width of the bar in chars. |
| 42 | + """ |
| 43 | + print("Downloading: %d%% [%d / %d] bytes" % (current / total * 100, current, total), end="\r") |
| 44 | + |
| 45 | + |
| 46 | +def download_examples(): |
| 47 | + """ |
| 48 | + Download the example corpus |
| 49 | + """ |
| 50 | + |
| 51 | + data_path = "data" |
| 52 | + is_folder_exists = os.path.exists(data_path) |
| 53 | + if not is_folder_exists: |
| 54 | + print(f"Creating folder {data_path}.") |
| 55 | + # Create a new directory because it does not exist |
| 56 | + os.makedirs(data_path) |
| 57 | + |
| 58 | + if os.path.exists("data/train") and len(os.listdir("data/train")) > 0: |
| 59 | + print("data/train is not empty. Exiting the downloader.") # |
| 60 | + return |
| 61 | + if os.path.exists("data/test") and len(os.listdir("data/test")) > 0: |
| 62 | + print("data/test is not empty. Exiting the downloader.") # |
| 63 | + return |
| 64 | + |
| 65 | + url = 'https://raw.githubusercontent.com/fastdatascience/faststylometry/main/data/train_test.zip' |
| 66 | + |
| 67 | + local_file = "data/train_test.zip" |
| 68 | + print(f"Downloading {url} to {local_file}...") |
| 69 | + |
| 70 | + wget.download(url, out=local_file, bar=bar_custom) |
| 71 | + |
| 72 | + print(f"Downloaded {url} to {local_file}.\nExtracting...") |
| 73 | + |
| 74 | + with zipfile.ZipFile(local_file, 'r') as zip_ref: |
| 75 | + zip_ref.extractall(data_path) |
| 76 | + |
| 77 | + print(f"Extracted contents of zip file to {data_path}") |
0 commit comments