Skip to content

Commit d041ff8

Browse files
committed
[tool] Add pixelart dataset downloader
Reference: https://huggingface.co/datasets/jainr3/diffusiondb-pixelart
1 parent 8bc48fc commit d041ff8

File tree

1 file changed

+58
-0
lines changed

1 file changed

+58
-0
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import os
2+
import json
3+
from datasets import load_dataset
4+
5+
6+
def generate_sequential_filename(index):
7+
"""Generate sequential filename with leading zeros (00001.png, 00002.png, etc.)."""
8+
return f"{index + 1:05d}.png"
9+
10+
11+
def setup_directories(base_dir):
12+
"""Create train directory if it doesn't exist."""
13+
train_dir = os.path.join(base_dir, "train")
14+
os.makedirs(train_dir, exist_ok=True)
15+
return train_dir
16+
17+
18+
def main():
19+
# Setup directories
20+
base_dir = os.path.dirname(os.path.abspath(__file__))
21+
train_dir = setup_directories(base_dir)
22+
23+
# Load the dataset
24+
print("Loading diffusiondb-pixelart dataset (2k_all subset)...")
25+
dataset = load_dataset("jainr3/diffusiondb-pixelart", "2k_all")
26+
27+
# Get all data
28+
data = dataset["train"]
29+
num_samples = len(data)
30+
31+
# Process all data as train data
32+
train_metadata = []
33+
print(f"Processing all {num_samples} samples for training...")
34+
35+
for idx in range(num_samples):
36+
item = data[idx]
37+
prompt = item["text"]
38+
image = item["image"]
39+
40+
filename = generate_sequential_filename(idx)
41+
save_path = os.path.join(train_dir, filename)
42+
43+
# Save the image
44+
image.save(save_path)
45+
46+
train_metadata.append({"file_name": filename, "prompt": prompt})
47+
48+
# Save train metadata
49+
with open(os.path.join(train_dir, "metadata.jsonl"), "w", encoding="utf-8") as f:
50+
for item in train_metadata:
51+
f.write(json.dumps(item) + "\n")
52+
53+
print("Conversion complete!")
54+
print(f"Total samples: {len(train_metadata)}")
55+
56+
57+
if __name__ == "__main__":
58+
main()

0 commit comments

Comments
 (0)