Skip to content

Commit 726ae15

Browse files
committed
Merge branch 'image-metadata' fixes #104, #105
2 parents cea7197 + 9527077 commit 726ae15

File tree

3 files changed

+141
-42
lines changed

3 files changed

+141
-42
lines changed

tests/imageinfo.py

Lines changed: 105 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,114 @@
11
# -*- coding:utf-8 -*-
22

3-
query = 'https://en.wikipedia.org/w/api.php?action=query&format=json&formatversion=2&iiprop=size|url|timestamp&prop=imageinfo&titles=File%3ADouglas%20adams%20portrait%20cropped.jpg'
3+
query = 'https://en.wikipedia.org/w/api.php?action=query&formatversion=2&iiprop=size|url|timestamp|extmetadata&prop=imageinfo&titles=File%3ADouglas%20adams%20portrait%20cropped.jpg'
44

55
response = r'''
66
{
7-
"continue": {
8-
"iistart": "2010-04-16T22:53:21Z",
9-
"continue": "||"
10-
},
11-
"query": {
12-
"pages": [
13-
{
14-
"ns": 6,
15-
"title": "File:Douglas adams portrait cropped.jpg",
16-
"missing": true,
17-
"known": true,
18-
"imagerepository": "shared",
19-
"imageinfo": [
20-
{
21-
"timestamp": "2010-04-16T22:54:28Z",
22-
"size": 32915,
23-
"width": 333,
24-
"height": 386,
25-
"url": "https://upload.wikimedia.org/wikipedia/commons/c/c0/Douglas_adams_portrait_cropped.jpg",
26-
"descriptionurl": "https://commons.wikimedia.org/wiki/File:Douglas_adams_portrait_cropped.jpg",
27-
"descriptionshorturl": "https://commons.wikimedia.org/w/index.php?curid=10031710"
28-
}
7+
"continue": {
8+
"iistart": "2010-04-16T22:53:21Z",
9+
"continue": "||"
10+
},
11+
"query": {
12+
"pages": [
13+
{
14+
"ns": 6,
15+
"title": "File:Douglas adams portrait cropped.jpg",
16+
"missing": true,
17+
"known": true,
18+
"imagerepository": "shared",
19+
"imageinfo": [
20+
{
21+
"timestamp": "2010-04-16T22:54:28Z",
22+
"size": 32915,
23+
"width": 333,
24+
"height": 386,
25+
"url": "https://upload.wikimedia.org/wikipedia/commons/c/c0/Douglas_adams_portrait_cropped.jpg",
26+
"descriptionurl": "https://commons.wikimedia.org/wiki/File:Douglas_adams_portrait_cropped.jpg",
27+
"descriptionshorturl": "https://commons.wikimedia.org/w/index.php?curid=10031710",
28+
"extmetadata": {
29+
"DateTime": {
30+
"value": "2010-04-16 22:54:28",
31+
"source": "mediawiki-metadata",
32+
"hidden": ""
33+
},
34+
"ObjectName": {
35+
"value": "Douglas adams portrait cropped",
36+
"source": "mediawiki-metadata",
37+
"hidden": ""
38+
},
39+
"CommonsMetadataExtension": {
40+
"value": 1.2,
41+
"source": "extension",
42+
"hidden": ""
43+
},
44+
"Categories": {
45+
"value": "Douglas Adams|Portrait photographs of men|Self-published work|Uploaded with derivativeFX",
46+
"source": "commons-categories",
47+
"hidden": ""
48+
},
49+
"Assessments": {
50+
"value": "",
51+
"source": "commons-categories",
52+
"hidden": ""
53+
},
54+
"ImageDescription": {
55+
"value": "douglas adams inspired \"Hitch hikers guide to the galaxy\" H2G2 <a rel=\"nofollow\" class=\"external text\" href=\"http://www.hughes-photography.eu\">www.hughes-photography.eu</a>",
56+
"source": "commons-desc-page"
57+
},
58+
"DateTimeOriginal": {
59+
"value": "",
60+
"source": "commons-desc-page"
61+
},
62+
"Credit": {
63+
"value": "<ul>\n<li><a href=\"//commons.wikimedia.org/wiki/File:Douglas_adams_portrait.jpg\" title=\"File:Douglas adams portrait.jpg\">Douglas_adams_portrait.jpg</a></li>\n</ul>",
64+
"source": "commons-desc-page",
65+
"hidden": ""
66+
},
67+
"Artist": {
68+
"value": "<ul>\n<li>\n<a href=\"//commons.wikimedia.org/wiki/File:Douglas_adams_portrait.jpg\" title=\"File:Douglas adams portrait.jpg\">Douglas_adams_portrait.jpg</a>: <a rel=\"nofollow\" class=\"external text\" href=\"https://www.flickr.com/people/79664273@N00\">michael hughes</a> from berlin, germany</li>\n<li>derivative work: <a href=\"//commons.wikimedia.org/wiki/User:Beao\" title=\"User:Beao\">Bea</a><b><a href=\"//commons.wikimedia.org/wiki/User_talk:Beao\" title=\"User talk:Beao\">o</a></b>\n</li>\n</ul>",
69+
"source": "commons-desc-page"
70+
},
71+
"LicenseShortName": {
72+
"value": "CC BY-SA 2.0",
73+
"source": "commons-desc-page",
74+
"hidden": ""
75+
},
76+
"UsageTerms": {
77+
"value": "Creative Commons Attribution-Share Alike 2.0",
78+
"source": "commons-desc-page",
79+
"hidden": ""
80+
},
81+
"AttributionRequired": {
82+
"value": "true",
83+
"source": "commons-desc-page",
84+
"hidden": ""
85+
},
86+
"LicenseUrl": {
87+
"value": "https://creativecommons.org/licenses/by-sa/2.0",
88+
"source": "commons-desc-page",
89+
"hidden": ""
90+
},
91+
"Copyrighted": {
92+
"value": "True",
93+
"source": "commons-desc-page",
94+
"hidden": ""
95+
},
96+
"Restrictions": {
97+
"value": "",
98+
"source": "commons-desc-page",
99+
"hidden": ""
100+
},
101+
"License": {
102+
"value": "cc-by-sa-2.0",
103+
"source": "commons-templates",
104+
"hidden": ""
105+
}
106+
}
107+
}
108+
]
109+
}
29110
]
30-
}
31-
]
32-
}
111+
}
33112
}
34113
'''
35114

wptools/page.py

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,18 @@ def __init__(self, *args, **kwargs):
7676
else:
7777
self.show()
7878

79-
def __update_imagedata(self, title, _from, info):
79+
def __insert_image_info(self, title, _from, info):
8080
"""
81-
Update page images with get_imageinfo() data
81+
Insert API image INFO into matching image dict
8282
83-
We make one imageinfo request containing only unique
84-
image['file'] names. We match the API response data to an
85-
image['file'] by API title/file match or API
86-
normalized["from"]/file match. So, some imageinfo data will be
87-
applied to more than one image['kind'].
83+
We make one imageinfo request containing only unique image
84+
filenames. We reduce duplication by asking for image data per
85+
file, instead of per "kind" or source (Wikipedia, Wikidata,
86+
etc.), because some sources reference the same image file. We
87+
match API imageinfo response data to existing image filenames
88+
by API title or normalized "from" title. So, some imageinfo
89+
data will be applied to more than one image "kind" (source) if
90+
they share the same filename.
8891
"""
8992
for img in self.data['image']:
9093
if 'url' not in img:
@@ -93,6 +96,28 @@ def __update_imagedata(self, title, _from, info):
9396
elif _from == img['file']: # matching from/file
9497
img.update(info)
9598

99+
def __pull_image_info(self, title, imageinfo, normalized):
100+
"""
101+
Pull image INFO from API response and insert
102+
"""
103+
for info in imageinfo:
104+
info.update({'title': title})
105+
106+
# get API normalized "from" filename for matching
107+
_from = None
108+
for norm in normalized:
109+
if title == norm['to']:
110+
_from = norm['from']
111+
112+
# let's put all "metadata" in one member
113+
info['metadata'] = {}
114+
extmetadata = info.get('extmetadata')
115+
if extmetadata:
116+
info['metadata'].update(extmetadata)
117+
del info['extmetadata']
118+
119+
self.__insert_image_info(title, _from, info)
120+
96121
def _missing_imageinfo(self):
97122
"""
98123
returns list of image filenames that are missing info
@@ -189,14 +214,9 @@ def _set_imageinfo_data(self):
189214

190215
for page in pages:
191216
title = page.get('title')
192-
if page.get('imageinfo'):
193-
for info in page['imageinfo']:
194-
info.update({'title': title})
195-
_from = None # normalized filename
196-
for norm in normalized:
197-
if title == norm['to']:
198-
_from = norm['from']
199-
self.__update_imagedata(title, _from, info)
217+
imageinfo = page.get('imageinfo')
218+
if imageinfo:
219+
self.__pull_image_info(title, imageinfo, normalized)
200220

201221
# Mark missing imageinfo to prevent duplicate requests
202222
for img in self.data['image']:

wptools/query.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class WPToolsQuery(object):
3737
"${WIKI}/w/api.php?action=query"
3838
"&format=json"
3939
"&formatversion=2"
40-
"&iiprop=size|url|timestamp"
40+
"&iiprop=size|url|timestamp|extmetadata"
4141
"&prop=imageinfo"
4242
"&titles=${FILES}"))
4343

0 commit comments

Comments
 (0)