Skip to content

Commit 0d3f2da

Browse files
committed
New release
- python3 required - updated documentation - supply a User-Agent when making requests - command line utility is now microdata instead of microdata.py
1 parent cfda6fe commit 0d3f2da

File tree

5 files changed

+162
-44
lines changed

5 files changed

+162
-44
lines changed

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) Ed Summers
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 112 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,124 @@ microdata
33

44
[![Build Status](https://secure.travis-ci.org/edsu/microdata.png)](http://travis-ci.org/edsu/microdata)
55

6-
microdata.py is a small utility library for extracting
7-
[HTML5 Microdata](http://dev.w3.org/html5/md/) from
8-
HTML. It depends on
9-
[html5lib](http://code.google.com/p/html5lib/)
10-
to do the heavy lifting of building the DOM.
11-
For more about HTML5 Microdata check out Mark Pilgrim's
12-
[chapter](http://diveintohtml5.org/extensibility.html) on on it in
13-
[Dive Into HTML5](http://diveintohtml5.org/).
6+
microdata.py is a small utility library for extracting [HTML5
7+
Microdata](http://dev.w3.org/html5/md/) from HTML. It depends on
8+
[html5lib](http://code.google.com/p/html5lib/) to do the heavy lifting of
9+
building the DOM. For more about HTML5 Microdata check out Mark Pilgrim's
10+
[chapter](http://diveintohtml5.org/extensibility.html) on on it in [Dive Into
11+
HTML5](http://diveintohtml5.org/).
1412

1513
Command Line
1614
------------
1715

18-
When you install microdata.py via pip it will be made available on the command
19-
line too:
16+
When you install microdata via pip it will also install a command line utility:
2017

21-
% microdata.py http://www.wdl.org/en/item/1/
18+
```
19+
$ microdata https://www.youtube.com/watch?v=dQw4w9WgXcQ
20+
https://www.youtube.com/watch?v=dQw4w9WgXcQ
21+
{
22+
"items": [
23+
{
24+
"type": [
25+
"http://schema.org/VideoObject"
26+
],
27+
"properties": {
28+
"url": [
29+
"https://www.youtube.com/watch?v=dQw4w9WgXcQ"
30+
],
31+
"name": [
32+
"Rick Astley - Never Gonna Give You Up (Official Music Video)"
33+
],
34+
"description": [
35+
"The official video for \u00e2\u20ac\u0153Never Gonna Give You Up\u00e2\u20ac\ufffd by Rick Astley \u00e2\u20ac\u0153Never Gonna Give You Up\u00e2\u20ac\ufffd was a global smash on its release in July 1987, topping the charts ..."
36+
],
37+
"paid": [
38+
"False"
39+
],
40+
"channelId": [
41+
"UCuAXFkgsw1L7xaCfnd5JJOw"
42+
],
43+
"videoId": [
44+
"dQw4w9WgXcQ"
45+
],
46+
"duration": [
47+
"PT3M33S"
48+
],
49+
"unlisted": [
50+
"False"
51+
],
52+
"author": [
53+
{
54+
"type": [
55+
"http://schema.org/Person"
56+
],
57+
"properties": {
58+
"url": [
59+
"http://www.youtube.com/channel/UCuAXFkgsw1L7xaCfnd5JJOw"
60+
],
61+
"name": [
62+
""
63+
]
64+
}
65+
}
66+
],
67+
"thumbnailUrl": [
68+
"https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg"
69+
],
70+
"thumbnail": [
71+
{
72+
"type": [
73+
"http://schema.org/ImageObject"
74+
],
75+
"properties": {
76+
"url": [
77+
"https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg"
78+
],
79+
"width": [
80+
"1280"
81+
],
82+
"height": [
83+
"720"
84+
]
85+
}
86+
}
87+
],
88+
"embedUrl": [
89+
"https://www.youtube.com/embed/dQw4w9WgXcQ"
90+
],
91+
"playerType": [
92+
"HTML5 Flash"
93+
],
94+
"width": [
95+
"1280"
96+
],
97+
"height": [
98+
"720"
99+
],
100+
"isFamilyFriendly": [
101+
"true"
102+
],
103+
"regionsAllowed": [
104+
"AD,AE,AF,AG,AI,AL,AM,AO,AQ,AR,AS,AT,AU,AW,AX,AZ,BA,BB,BD,BE,BF,BG,BH,BI,BJ,BL,BM,BN,BO,BQ,BR,BS,BT,BV,BW,BY,BZ,CA,CC,CD,CF,CG,CH,CI,CK,CL,CM,CN,CO,CR,CU,CV,CW,CX,CY,CZ,DE,DJ,DK,DM,DO,DZ,EC,EE,EG,EH,ER,ES,ET,FI,FJ,FK,FM,FO,FR,GA,GB,GD,GE,GF,GG,GH,GI,GL,GM,GN,GP,GQ,GR,GS,GT,GU,GW,GY,HK,HM,HN,HR,HT,HU,ID,IE,IL,IM,IN,IO,IQ,IR,IS,IT,JE,JM,JO,JP,KE,KG,KH,KI,KM,KN,KP,KR,KW,KY,KZ,LA,LB,LC,LI,LK,LR,LS,LT,LU,LV,LY,MA,MC,MD,ME,MF,MG,MH,MK,ML,MM,MN,MO,MP,MQ,MR,MS,MT,MU,MV,MW,MX,MY,MZ,NA,NC,NE,NF,NG,NI,NL,NO,NP,NR,NU,NZ,OM,PA,PE,PF,PG,PH,PK,PL,PM,PN,PR,PS,PT,PW,PY,QA,RE,RO,RS,RU,RW,SA,SB,SC,SD,SE,SG,SH,SI,SJ,SK,SL,SM,SN,SO,SR,SS,ST,SV,SX,SY,SZ,TC,TD,TF,TG,TH,TJ,TK,TL,TM,TN,TO,TR,TT,TV,TW,TZ,UA,UG,UM,US,UY,UZ,VA,VC,VE,VG,VI,VN,VU,WF,WS,YE,YT,ZA,ZM,ZW"
105+
],
106+
"interactionCount": [
107+
"1141688870"
108+
],
109+
"datePublished": [
110+
"2009-10-24"
111+
],
112+
"uploadDate": [
113+
"2009-10-24"
114+
],
115+
"genre": [
116+
"Music"
117+
]
118+
}
119+
}
120+
]
121+
}
122+
```
22123

23-
This will print out the JSON for items extracted from the supplied URL.
24124

25125
Library
26126
-------

microdata.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,32 @@
11
#!/usr/bin/env python
22

33
import sys
4+
import json
45
import html5lib
56

67
from collections import defaultdict
8+
from urllib.request import urlopen, Request
79

10+
USER_AGENT = "microdata.py <https://github.com/edsu/microdata>"
811

9-
try:
10-
import json
11-
except ImportError:
12-
import simplejson as json
1312

13+
def main():
14+
15+
if len(sys.argv) < 2:
16+
print("Usage: microdata <URL>")
17+
sys.exit(1)
18+
19+
for url in sys.argv[1:]:
20+
sys.stderr.write(url + "\n")
21+
22+
microdata = {}
23+
microdata['items'] = items = []
24+
25+
req = Request(url, headers={"User-Agent": USER_AGENT})
26+
for item in get_items(urlopen(req)):
27+
items.append(item.json_dict())
28+
29+
print(json.dumps(microdata, indent=2))
1430

1531
def get_items(location, encoding=None):
1632
"""
@@ -230,22 +246,6 @@ def _make_item(e):
230246

231247

232248
if __name__ == "__main__":
233-
try:
234-
from urllib.request import urlopen
235-
except ImportError:
236-
from urllib import urlopen
237-
238-
if len(sys.argv) < 2:
239-
print("Usage: %s URL [...]" % sys.argv[0])
240-
sys.exit(1)
241-
242-
for url in sys.argv[1:]:
243-
sys.stderr.write(url + "\n")
244-
245-
microdata = {}
246-
microdata['items'] = items = []
249+
main()
247250

248-
for item in get_items(urlopen(url)):
249-
items.append(item.json_dict())
250251

251-
print(json.dumps(microdata, indent=2))

setup.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
11
from setuptools import setup
2-
import sys
3-
4-
extra = {}
52

63
setup(
74
name = 'microdata',
8-
version = '0.7.1',
5+
version = '0.8.0',
96
description = "html5lib extension for parsing microdata",
107
author = "Ed Summers",
118
author_email = "[email protected]",
129
url = "http://github.com/edsu/microdata",
10+
python_requires=">=3.3",
1311
py_modules = ['microdata'],
14-
scripts = ['microdata.py'],
1512
test_suite = 'test',
1613
install_requires = ['html5lib>=0.999999999'],
17-
**extra
14+
entry_points = {
15+
"console_scripts": [
16+
"microdata = microdata:main"
17+
]
18+
}
1819
)

test.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
try:
2-
import json
3-
except ImportError:
4-
import simplejson as json
5-
1+
import json
62
import unittest
73

84
from microdata import get_items, Item, URI

0 commit comments

Comments
 (0)