Skip to content

Commit 3730d3a

Browse files
committed
demozoo: last fixed and improvements to the scraper
1 parent 7c9dca0 commit 3730d3a

File tree

4 files changed

+184
-65
lines changed

4 files changed

+184
-65
lines changed

scrapers/py_importers/demozoo.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,8 @@ def scrape_page(slug, url, platform):
283283
video,
284284
date=release_date,
285285
repository=source,
286-
url=demozoo_url,
286+
url=url,
287+
url2=demozoo_url,
287288
)
288289

289290

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import os
2+
import hashlib
3+
import json
4+
import argparse
5+
6+
"""
7+
Run like this
8+
python dupe-check-against-dict.py /home/avivace/<USER>/database/scrapers/py_importers/py_common/beta /home/<USER>/gbdev/database/scripts/hashes.json
9+
10+
After having generated the 'hashes.json' file running scripts/dupe-finder.py once
11+
12+
"""
13+
14+
15+
def get_file_hash(filename, alg="md5", chunksize=131072):
16+
if alg == "sha256":
17+
h = hashlib.sha256()
18+
elif alg == "sha1":
19+
h = hashlib.sha1()
20+
elif alg == "md5":
21+
h = hashlib.md5()
22+
23+
with open(filename, "rb", buffering=0) as f:
24+
for b in iter(lambda: f.read(chunksize), b""):
25+
h.update(b)
26+
return h.hexdigest()
27+
28+
29+
def find_rom_files(folder):
30+
"""Find all .gb and .gbc files in subfolders of the given folder."""
31+
rom_files = []
32+
for root, _, files in os.walk(folder):
33+
for file in files:
34+
if file.endswith(".gb") or file.endswith(".gbc"):
35+
rom_files.append(os.path.join(root, file))
36+
return rom_files
37+
38+
39+
def check_md5_against_json(rom_files, json_path):
40+
"""Check the MD5 of each ROM file against the keys in the JSON file."""
41+
with open(json_path, "r") as f:
42+
md5_dict = json.load(f)
43+
44+
md5_list = list(md5_dict.keys())
45+
46+
for rom_file in rom_files:
47+
md5_checksum = get_file_hash(rom_file)
48+
if md5_checksum in md5_list:
49+
print(
50+
f"[FOUND] {rom_file} has a known MD5 checksum: {md5_dict[md5_checksum]}"
51+
)
52+
53+
54+
if __name__ == "__main__":
55+
parser = argparse.ArgumentParser(
56+
description="Check MD5 checksums of ROM files against a JSON file."
57+
)
58+
parser.add_argument(
59+
"folder_path",
60+
type=str,
61+
help="Path to folder A containing subfolders with ROM files.",
62+
)
63+
parser.add_argument(
64+
"json_file_path",
65+
type=str,
66+
help="Path to the JSON file containing MD5 checksums.",
67+
)
68+
69+
args = parser.parse_args()
70+
71+
# Find ROM files in the folder
72+
rom_files = find_rom_files(args.folder_path)
73+
74+
if not rom_files:
75+
print("No .gb or .gbc files found.")
76+
else:
77+
# Check their MD5 checksums against the JSON file
78+
check_md5_against_json(rom_files, args.json_file_path)

scrapers/py_importers/py_common/Production.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,28 @@
11
class Production:
2-
def __init__(self, title, slug, developer, platform, typetag, screenshots, files,
3-
lic="", assetLicense="", description="", video="", date="", tags=[], alias="", repository="", gameWebsite="", devWebsite="", onlineplay="",
4-
wip="", url=""):
2+
def __init__(
3+
self,
4+
title,
5+
slug,
6+
developer,
7+
platform,
8+
typetag,
9+
screenshots,
10+
files,
11+
lic="",
12+
assetLicense="",
13+
description="",
14+
video="",
15+
date="",
16+
tags=[],
17+
alias="",
18+
repository="",
19+
gameWebsite="",
20+
devWebsite="",
21+
onlineplay="",
22+
wip="",
23+
url="",
24+
url2="",
25+
):
526
# mandatory fields
627
self.title = title
728
self.slug = slug
@@ -26,3 +47,4 @@ def __init__(self, title, slug, developer, platform, typetag, screenshots, files
2647
self.wip = wip if wip else ""
2748

2849
self.url = url
50+
self.url2 = url2

0 commit comments

Comments
 (0)