Skip to content

Commit f6e6711

Browse files
[cfake] add support (#707 #6021 #8549 #8430)
* Create cfake.py * Update __init__.py * Create cfake.py tests * update - simplify & combine code - adjust 'pattern': use '[^/?#]', match lines and groups - generalize example URLs - update default filenames * update docs/supportedsites * update test results --------- Co-authored-by: Mike Fährmann <[email protected]>
1 parent 2578f7b commit f6e6711

File tree

5 files changed

+298
-0
lines changed

5 files changed

+298
-0
lines changed

docs/supportedsites.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,12 @@ Consider all listed sites to potentially be NSFW.
181181
<td>Albums, Files</td>
182182
<td></td>
183183
</tr>
184+
<tr id="cfake" title="cfake">
185+
<td>Celebrity Fakes</td>
186+
<td>https://cfake.com/</td>
187+
<td>Categories, Celebrities, Countries, Created</td>
188+
<td></td>
189+
</tr>
184190
<tr id="naver-chzzk" title="naver-chzzk">
185191
<td>CHZZK</td>
186192
<td>https://chzzk.naver.com/</td>

gallery_dl/extractor/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"booth",
4141
"bunkr",
4242
"catbox",
43+
"cfake",
4344
"chevereto",
4445
"cien",
4546
"civitai",

gallery_dl/extractor/cfake.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License version 2 as
5+
# published by the Free Software Foundation.
6+
7+
"""Extractors for https://cfake.com/"""
8+
9+
from .common import Extractor, Message
10+
from .. import text
11+
12+
BASE_PATTERN = r"(?:https?://)?(?:www\.)?cfake\.com"
13+
14+
15+
class CfakeExtractor(Extractor):
16+
"""Base class for cfake extractors"""
17+
category = "cfake"
18+
root = "https://cfake.com"
19+
directory_fmt = ("{category}", "{type}", "{type_name} ({type_id})")
20+
filename_fmt = "{category}_{type_name}_{id}.{extension}"
21+
archive_fmt = "{id}"
22+
23+
def items(self):
24+
type, type_name, type_id, sub_id, pnum = self.groups
25+
26+
if type.endswith("ies"):
27+
type = type[:-3] + "y"
28+
29+
kwdict = self.kwdict
30+
kwdict["type"] = type
31+
kwdict["type_id"] = text.parse_int(type_id)
32+
kwdict["type_name"] = text.unquote(type_name).replace("_", " ")
33+
kwdict["sub_id"] = text.parse_int(sub_id)
34+
kwdict["page"] = pnum = text.parse_int(pnum, 1)
35+
yield Message.Directory, {}
36+
37+
base = f"{self.root}/images/{type}/{type_name}/{type_id}"
38+
if sub_id:
39+
base = f"{base}/{sub_id}"
40+
41+
while True:
42+
url = base if pnum < 2 else f"{base}/p{pnum}"
43+
page = self.request(url).text
44+
45+
# Extract and yield images
46+
num = 0
47+
for image in self._extract_images(page):
48+
num += 1
49+
image["num"] = num + (pnum - 1) * 50
50+
url = image["url"]
51+
yield Message.Url, url, text.nameext_from_url(url, image)
52+
53+
# Check for next page
54+
if not num or not (pnum := self._check_pagination(page)):
55+
return
56+
kwdict["page"] = pnum
57+
58+
def _extract_images(self, page):
59+
"""Extract image URLs and metadata from a gallery page"""
60+
for item in text.extract_iter(
61+
page, '<a href="javascript:showimage(', '</div></div>'):
62+
63+
# Extract image path from showimage call
64+
# Format: 'big.php?show=2025/filename.jpg&id_picture=...
65+
show_param = text.extr(item, "show=", "&")
66+
if not show_param:
67+
continue
68+
69+
# Extract metadata
70+
picture_id = text.extr(item, "id_picture=", "&")
71+
name_param = text.extr(item, "p_name=", "'")
72+
73+
# Extract date
74+
date = text.extr(item, 'id="date_vignette">', '</div>')
75+
76+
# Extract rating
77+
rating_text = text.extr(item, 'class="current-rating"', '</li>')
78+
rating = text.extr(rating_text, 'width:', 'px')
79+
80+
# Convert thumbnail path to full image path
81+
# show_param is like "2025/filename.jpg"
82+
image_url = f"{self.root}/medias/photos/{show_param}"
83+
84+
yield {
85+
"url": image_url,
86+
"id": text.parse_int(picture_id) if picture_id else 0,
87+
"name": text.unescape(name_param) if name_param else "",
88+
"date": date,
89+
"rating": rating,
90+
}
91+
92+
def _check_pagination(self, page):
93+
"""Check if there are more pages and return next page number"""
94+
# Look for current page indicator
95+
# Format: id="num_page_current" ><a href=".../ p1">1</a>
96+
current_section = text.extr(
97+
page, 'id="num_page_current"', '</div>')
98+
if not current_section:
99+
return None
100+
101+
# Extract current page number from the link text
102+
current_page_str = text.extr(current_section, '">', '</a>')
103+
if not current_page_str:
104+
return None
105+
106+
current_page = text.parse_int(current_page_str)
107+
if not current_page:
108+
return None
109+
110+
next_page = current_page + 1
111+
112+
# Check if next page link exists anywhere in the page
113+
# Look for href="/images/.../pN" pattern
114+
if f'/p{next_page}"' in page or f'/p{next_page} ' in page:
115+
return next_page
116+
117+
return None
118+
119+
120+
class CfakeCelebrityExtractor(CfakeExtractor):
121+
"""Extractor for celebrity image galleries from cfake.com"""
122+
subcategory = "celebrity"
123+
pattern = (BASE_PATTERN + r"/images/(celebrity)"
124+
r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
125+
example = "https://cfake.com/images/celebrity/NAME/123"
126+
127+
128+
class CfakeCategoryExtractor(CfakeExtractor):
129+
"""Extractor for category image galleries from cfake.com"""
130+
subcategory = "category"
131+
pattern = (BASE_PATTERN + r"/images/(categories)"
132+
r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
133+
example = "https://cfake.com/images/categories/NAME/123"
134+
135+
136+
class CfakeCreatedExtractor(CfakeExtractor):
137+
"""Extractor for 'created' image galleries from cfake.com"""
138+
subcategory = "created"
139+
pattern = (BASE_PATTERN + r"/images/(created)"
140+
r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
141+
example = "https://cfake.com/images/created/NAME/12345/123"
142+
143+
144+
class CfakeCountryExtractor(CfakeExtractor):
145+
"""Extractor for country image galleries from cfake.com"""
146+
subcategory = "country"
147+
pattern = (BASE_PATTERN + r"/images/(country)"
148+
r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
149+
example = "https://cfake.com/images/country/NAME/12345/123"

scripts/supportedsites.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"batoto" : "BATO.TO",
4141
"bbc" : "BBC",
4242
"booth" : "BOOTH",
43+
"cfake" : "Celebrity Fakes",
4344
"cien" : "Ci-en",
4445
"cohost" : "cohost!",
4546
"comicvine" : "Comic Vine",
@@ -250,6 +251,9 @@
250251
"boosty": {
251252
"feed": "Subscriptions Feed",
252253
},
254+
"cfake": {
255+
"created": "Created",
256+
},
253257
"civitai": {
254258
"models": "Model Listings",
255259
"images": "Image Listings",

test/results/cfake.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License version 2 as
5+
# published by the Free Software Foundation.
6+
7+
from gallery_dl.extractor import cfake
8+
9+
10+
__tests__ = (
11+
{
12+
"#url" : "https://cfake.com/images/celebrity/Kaley_Cuoco/631/",
13+
"#category": ("", "cfake", "celebrity"),
14+
"#class" : cfake.CfakeCelebrityExtractor,
15+
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
16+
"#range" : "1-20",
17+
"#count" : 20,
18+
19+
"type" : "celebrity",
20+
"type_id" : 631,
21+
"type_name" : "Kaley Cuoco",
22+
"page" : 1,
23+
"id" : int,
24+
"num" : int,
25+
"date" : str,
26+
"rating" : str,
27+
},
28+
29+
{
30+
"#url" : "https://cfake.com/images/celebrity/Kaley_Cuoco/631/p2",
31+
"#comment" : "pagination test - page 2",
32+
"#category": ("", "cfake", "celebrity"),
33+
"#class" : cfake.CfakeCelebrityExtractor,
34+
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
35+
"#range" : "1-5",
36+
37+
"type" : "celebrity",
38+
"type_id" : 631,
39+
"type_name" : "Kaley Cuoco",
40+
"page" : 2,
41+
},
42+
43+
{
44+
"#url" : "https://www.cfake.com/images/celebrity/Chloe_Grace_Moretz/6575/",
45+
"#category": ("", "cfake", "celebrity"),
46+
"#class" : cfake.CfakeCelebrityExtractor,
47+
},
48+
49+
{
50+
"#url" : "https://cfake.com/images/categories/Facial/25/",
51+
"#category": ("", "cfake", "category"),
52+
"#class" : cfake.CfakeCategoryExtractor,
53+
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
54+
"#range" : "1-10",
55+
"#count" : 10,
56+
57+
"type" : "category",
58+
"type_id" : 25,
59+
"type_name" : "Facial",
60+
"page" : 1,
61+
"id" : int,
62+
"num" : int,
63+
},
64+
65+
{
66+
"#url" : "https://cfake.com/images/categories/Big_Tits/35/",
67+
"#category": ("", "cfake", "category"),
68+
"#class" : cfake.CfakeCategoryExtractor,
69+
},
70+
71+
{
72+
"#url" : "https://cfake.com/images/categories/Big_Tits/35/p2",
73+
"#comment" : "category pagination test",
74+
"#category": ("", "cfake", "category"),
75+
"#class" : cfake.CfakeCategoryExtractor,
76+
},
77+
78+
{
79+
"#url" : "https://cfake.com/images/created/Spice_Girls_%28band%29/72/4",
80+
"#category": ("", "cfake", "created"),
81+
"#class" : cfake.CfakeCreatedExtractor,
82+
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
83+
"#range" : "1-10",
84+
"#count" : 10,
85+
86+
"type" : "created",
87+
"type_id" : 72,
88+
"type_name" : "Spice Girls (band)",
89+
"sub_id" : 4,
90+
"page" : 1,
91+
"id" : int,
92+
"num" : int,
93+
},
94+
95+
{
96+
"#url" : "https://cfake.com/images/created/Brooklyn_Nine-Nine/4142/4",
97+
"#category": ("", "cfake", "created"),
98+
"#class" : cfake.CfakeCreatedExtractor,
99+
},
100+
101+
{
102+
"#url" : "https://cfake.com/images/created/Brooklyn_Nine-Nine/4142/4/p2",
103+
"#comment" : "created pagination test",
104+
"#category": ("", "cfake", "created"),
105+
"#class" : cfake.CfakeCreatedExtractor,
106+
},
107+
108+
{
109+
"#url" : "https://cfake.com/images/country/Australia/12/5",
110+
"#category": ("", "cfake", "country"),
111+
"#class" : cfake.CfakeCountryExtractor,
112+
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
113+
"#range" : "1-10",
114+
"#count" : 10,
115+
116+
"type" : "country",
117+
"type_id" : 12,
118+
"type_name" : "Australia",
119+
"sub_id" : 5,
120+
"page" : 1,
121+
"id" : int,
122+
"num" : int,
123+
},
124+
125+
{
126+
"#url" : "https://cfake.com/images/country/Mexico/139/5",
127+
"#category": ("", "cfake", "country"),
128+
"#class" : cfake.CfakeCountryExtractor,
129+
},
130+
131+
{
132+
"#url" : "https://cfake.com/images/country/Mexico/139/5/p3",
133+
"#comment" : "country pagination test",
134+
"#category": ("", "cfake", "country"),
135+
"#class" : cfake.CfakeCountryExtractor,
136+
},
137+
138+
)

0 commit comments

Comments
 (0)