Skip to content

Commit 374a302

Browse files
author
Anton Hosgood
committed
Initial commit
1 parent ab28bd2 commit 374a302

23 files changed

+986
-0
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: Ruff
2+
3+
on: [ push ]
4+
5+
jobs:
6+
build:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v4
10+
- name: Set up Python 3.13
11+
uses: actions/setup-python@v3
12+
with:
13+
python-version: "3.13"
14+
- name: Install dependencies
15+
run: |
16+
python -m pip install --upgrade pip
17+
pip install ruff
18+
- name: Check formatting
19+
run: |
20+
ruff check .
21+
ruff check --select I .
22+
ruff format --check .

.gitignore

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/#use-with-ide
110+
.pdm.toml
111+
112+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113+
__pypackages__/
114+
115+
# Celery stuff
116+
celerybeat-schedule
117+
celerybeat.pid
118+
119+
# SageMath parsed files
120+
*.sage.py
121+
122+
# Environments
123+
.env
124+
.venv
125+
env/
126+
venv/
127+
ENV/
128+
env.bak/
129+
venv.bak/
130+
131+
# Spyder project settings
132+
.spyderproject
133+
.spyproject
134+
135+
# Rope project settings
136+
.ropeproject
137+
138+
# mkdocs documentation
139+
/site
140+
141+
# mypy
142+
.mypy_cache/
143+
.dmypy.json
144+
dmypy.json
145+
146+
# Pyre type checker
147+
.pyre/
148+
149+
# pytype static type analyzer
150+
.pytype/
151+
152+
# Cython debug symbols
153+
cython_debug/
154+
155+
# PyCharm
156+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158+
# and can be added to the global gitignore or merged into this file. For a more nuclear
159+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160+
.idea/
161+
162+
# Generated datasets
163+
data/
164+
!src/data/
165+
166+
# Model checkpoints
167+
checkpoints/

notebooks/demo.ipynb

Lines changed: 112 additions & 0 deletions
Large diffs are not rendered by default.

requirements.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
requests~=2.32.3
2+
torch~=2.7.0
3+
pillow~=11.2.1
4+
torchvision~=0.22.0
5+
matplotlib~=3.10.3
6+
tqdm~=4.67.1
7+
PyYAML~=6.0.2

scripts/generate_dataset.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import argparse
2+
from pathlib import Path
3+
4+
from src.api.lorem_picsum import LoremPicsum
5+
6+
7+
def main():
8+
parser = argparse.ArgumentParser()
9+
parser.add_argument("output", type=str)
10+
parser.add_argument("width", type=int)
11+
parser.add_argument("height", type=int)
12+
13+
args = parser.parse_args()
14+
width = args.width
15+
height = args.height
16+
17+
output_path = Path(args.output)
18+
dataset_dir = f"stock_images_{width}x{height}"
19+
color_image_dir = output_path / dataset_dir / "color"
20+
grayscale_image_dir = output_path / dataset_dir / "grayscale"
21+
22+
color_image_dir.mkdir(parents=True, exist_ok=True)
23+
grayscale_image_dir.mkdir(parents=True, exist_ok=True)
24+
25+
api = LoremPicsum(width, height)
26+
image_ids = LoremPicsum.get_image_ids()
27+
28+
# Iterate through image_ids instead of a range as some IDs are missing
29+
for idx, image_id in enumerate(image_ids):
30+
color_output_path = color_image_dir / f"{idx}.jpg"
31+
grayscale_output_path = grayscale_image_dir / f"{idx}.jpg"
32+
33+
api.download_image(color_output_path, image_id, grayscale=False)
34+
api.download_image(grayscale_output_path, image_id, grayscale=True)
35+
36+
print(
37+
f"Image {image_id} successfully downloaded ({idx + 1} / {len(image_ids)})"
38+
)
39+
40+
print("All done.")
41+
42+
43+
if __name__ == "__main__":
44+
main()

src/__init__.py

Whitespace-only changes.

src/api/__init__.py

Whitespace-only changes.

src/api/lorem_picsum.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import time
2+
from os import PathLike
3+
from typing import AnyStr, List, Optional
4+
5+
import requests
6+
7+
8+
class LoremPicsum:
9+
"""A utility class to interact with the Lorem Picsum API for downloading placeholder images."""
10+
11+
API = "https://picsum.photos"
12+
PAGE_LIMIT = 100
13+
14+
def __init__(self, width: int, height: int) -> None:
15+
"""
16+
Initializes an instance of the LoremPicsum class with specific image dimensions.
17+
18+
Args:
19+
width: The width of images to be downloaded.
20+
height: The height of images to be downloaded.
21+
"""
22+
self.width = width
23+
self.height = height
24+
25+
def download_image(
26+
self,
27+
path: PathLike[AnyStr] | AnyStr,
28+
image_id: Optional[int] = None,
29+
grayscale: bool = False,
30+
blur: Optional[int] = None,
31+
retries: int = 3,
32+
) -> None:
33+
"""
34+
Downloads a JPEG image from the Lorem Picsum API and saves it to a file.
35+
36+
Args:
37+
path: The path the output image will be saved to.
38+
image_id: The ID of the image to download (default is None). If not provided, a random image will be
39+
downloaded.
40+
grayscale: Whether to download the image in grayscale (default is False).
41+
blur: The blur level of the image (default is None). If provided, it should be an integer between 1 and 10
42+
indicating the level of blur.
43+
retries: The number of times to retry the request (default is 3).
44+
45+
Raises:
46+
requests.exceptions.RequestException: If the HTTP request to download the image fails.
47+
"""
48+
url = self.build_url(image_id)
49+
50+
params = {
51+
"grayscale": True if grayscale else None,
52+
"blur": blur if blur else None,
53+
}
54+
55+
for attempt in range(retries + 1):
56+
try:
57+
response = requests.get(url, params=params)
58+
response.raise_for_status()
59+
break
60+
except requests.exceptions.RequestException:
61+
if attempt == retries:
62+
raise
63+
time.sleep(1)
64+
65+
with open(path, "wb") as f:
66+
f.write(response.content)
67+
68+
def build_url(self, image_id: Optional[int] = None) -> str:
69+
"""
70+
Builds the URL for downloading an image from the Lorem Picsum API.
71+
72+
Args:
73+
image_id: The ID of the image to be included in the URL (default is None). If provided, it will specify a
74+
particular image.
75+
76+
Returns:
77+
str: The complete URL for the image download.
78+
"""
79+
url = self.API
80+
81+
if image_id is not None:
82+
url += f"/id/{image_id}"
83+
84+
url += f"/{self.width}/{self.height}"
85+
return url
86+
87+
@staticmethod
88+
def get_image_ids() -> List[int]:
89+
"""
90+
Returns a list of ids of images available in the Lorem Picsum API.
91+
92+
When downloading all images, iterate through the returned image IDs instead of a range as some IDs are missing.
93+
94+
Raises:
95+
requests.exceptions.RequestException: If the HTTP request to retrieve the image list fails.
96+
"""
97+
ids = []
98+
page = 1
99+
url = f"{LoremPicsum.API}/v2/list"
100+
101+
while True:
102+
response = requests.get(
103+
url, params={"page": page, "limit": LoremPicsum.PAGE_LIMIT}
104+
)
105+
response.raise_for_status()
106+
107+
results = response.json()
108+
if len(results) == 0:
109+
return ids
110+
111+
ids += [int(x["id"]) for x in results]
112+
page += 1

src/data/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)