Skip to content

Commit 8dd40dd

Browse files
authored
Merge pull request #36 from bee-san/improve-regex
Regex for LTC, BCH, XRP, XMR crypto wallets, IPv6 and more
2 parents 0c3172a + ea1b79d commit 8dd40dd

File tree

6 files changed

+251
-47
lines changed

6 files changed

+251
-47
lines changed

fixtures/file

+7-1
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,10 @@ ScOAntcCa78
3030
127.0.0.1
3131
github@skerritt.blog
3232

33-
Access-Control-Allow-Headers: *
33+
Access-Control-Allow-Headers: *
34+
35+
47DF8D9NwtmefhFUghynYRMqrexiZTsm48T1hhi2jZcbfcwoPbkhMrrED6zqJRfeYpXFfdaqAT3jnBEwoMwCx6BYDJ1W3ub
36+
LRX8rSPVjifTxoLeoJtLf2JYdJFTQFcE7m
37+
bitcoincash:qzlg6uvceehgzgtz6phmvy8gtdqyt6vf359at4n3lq
38+
rBPAQmwMrt7FDDPNyjwFgwSqbWZPf6SLkk
39+
2001:0db8:85a3:0000:0000:8a2e:0370:7334

pywhat/Data/regex.json

+88-32
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[
22
{
3-
"Name": "Ethereum Wallet Address",
4-
"Regex": "^0x[a-fA-F0-9]{40}$",
3+
"Name": "Ethereum (ETH) Wallet Address",
4+
"Regex": "(?i)^0x[a-f0-9]{40}$",
55
"Description": null,
66
"URL": "https://etherscan.io/address/",
77
"Rarity": 1,
@@ -12,7 +12,7 @@
1212
},
1313
{
1414
"Name": "Access-Control-Allow-Header",
15-
"Regex": "Access-Control-Allow: [a-zA-Z0-9\\-*]",
15+
"Regex": "(?i)Access-Control-Allow: [a-z0-9\\-*]",
1616
"Description": "Used for [#CAE4F1][link=https://en.wikipedia.org/wiki/Cross-origin_resource_sharing]Cross-Origin Resource Sharing (CORS)[/link][/#CAE4F1]",
1717
"Rarity": 1,
1818
"Tags": [
@@ -21,7 +21,7 @@
2121
]
2222
},
2323
{
24-
"Name": "Bitcoin Wallet",
24+
"Name": "Bitcoin (₿) Wallet Address",
2525
"Regex": "^[13][a-km-zA-HJ-NP-Z1-9]{25,34}$",
2626
"Description": null,
2727
"URL": "https://www.blockchain.com/btc/address/",
@@ -68,27 +68,60 @@
6868
]
6969
},
7070
{
71-
"Name": "Uniform Resource Locator (URL)",
72-
"Regex": "(https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]+\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]+\\.[^\\s]{2,})",
71+
"Name": "Bitcoin Cash (BCH) Wallet Address",
72+
"Regex": "(?i)^bitcoincash:[a-z0-9]{42}$",
73+
"Description": null,
74+
"URL": "https://www.blockchain.com/bch/address/",
75+
"Rarity": 1,
76+
"Tags": [
77+
"Cryptocurrency",
78+
"Finance"
79+
]
80+
},
81+
{
82+
"Name": "Internet Protocol (IP) Address Version 6",
83+
"Regex": "\\[?(?:(?:[0-9a-f]{1,4}:){7,7}[0-9a-f]{1,4}|([0-9a-f]{4}:){1,7}:|([0-9a-f]{1,4}:){1,6}:[0-9a-f]{1,4}|([0-9a-f]{1,4}:){1,5}(:[0-9a-f]{1,4}){1,2}|([0-9a-f]{1,4}:){1,4}(:[0-9a-f]{1,4}){1,3}|([0-9a-f]{1,4}:){1,3}(:[0-9a-f]{1,4}){1,4}|([0-9a-fA]{1,4}:){1,2}(:[0-9a-f]{1,4}){1,5}|[0-9a-f]{1,4}:((:[0-9a-f]{1,4}){1,6})|:((:[0-9a-f]{1,4}){1,7}|:)|fe80:(:[0-9a-f]{0,4}){0,4}%[0-9a-z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-f]{1,4}:){1,4}:(?:(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\]?(?::[0-9]{1,5})?",
7384
"Description": null,
85+
"URL": "https://www.shodan.io/host/",
7486
"Rarity": 1,
87+
"Tags": [
88+
"Identifiers",
89+
"Networking"
90+
]
91+
},
92+
{
93+
"Name": "Uniform Resource Locator (URL)",
94+
"Regex": "(?i)^(?:(?:(?:https?|ftp):)?\/\/)(?:\\S+(?::\\S*)?@?)?(?:(?!(?:10|127)(?:\\.\\d{1,3}){3})(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z0-9\u00a1-\uffff][a-z0-9\u00a1-\uffff_-]{0,62})?[a-z0-9\u00a1-\uffff]?\\.?)+?(?:[a-z\u00a1-\uffff]+\\.?))(?::\\d{2,5})?(?:[\/?#]\\S*)?$",
95+
"Description": null,
96+
"Rarity": 0.7,
7597
"Tags": [
7698
"Identifiers"
7799
]
78100
},
79101
{
80-
"Name":"Latitude & Longitude Coordinates",
81-
"Regex":"^[-+]?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?),\\s*[-+]?(180(\\.0+)?|((1[0-7]\\d)|([1-9]?\\d))(\\.\\d+)?)$",
82-
"plural_name": true,
83-
"Description": null,
84-
"URL": "https://www.google.com/maps/place/",
85-
"Rarity": 0.5,
86-
"Tags": [
87-
"Geo-location"
102+
"Name": "Internet Protocol (IP) Address Version 4",
103+
"Regex": "(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?::[0-9]{1,5})?",
104+
"Description": null,
105+
"URL": "https://www.shodan.io/host/",
106+
"Rarity": 0.7,
107+
"Tags": [
108+
"Identifiers",
109+
"Networking"
110+
]
111+
},
112+
{
113+
"Name":"Latitude & Longitude Coordinates",
114+
"Regex":"(?i)^(?:(?:N|W|S|E)\\s?\\d+\\s?\\u00B0?\\s?\\d+\\.?\\d*\\s?\\'?\\s?\\d*\\.?\\,?\\d*?\"?\\s?){1,2}$|^(?:\\d+\\s?\\u00B0\\s?\\d+\\s?\\'\\s?\\d+\\.?\\,?\\d{0,}?\"\\s?(?:N|W|S|E)\\s?){1,2}$|^(?:[-+]?(?:[0-8]?\\d+\\.\\d{4,}|90(?:\\.0+)?),\\s*[-+]?(?:180(?:\\.0+)?|(?:(?:1[0-7]\\d)|(?:[1-9]?\\d))(?:\\.\\d+)?))$",
115+
"plural_name": true,
116+
"Description": null,
117+
"URL": "https://www.google.com/maps/place/",
118+
"Rarity": 0.5,
119+
"Tags": [
120+
"Geo-location"
88121
]
89-
},
122+
},
90123
{
91-
"Name": "Dogecoin Wallet Address",
124+
"Name": "Dogecoin (DOGE) Wallet Address",
92125
"Regex": "^D{1}[5-9A-HJ-NP-U]{1}[1-9A-HJ-NP-Za-km-z]{32}$",
93126
"Description": null,
94127
"URL": "https://dogechain.info/address/",
@@ -98,6 +131,39 @@
98131
"Finance"
99132
]
100133
},
134+
{
135+
"Name": "Monero (XMR) Wallet Address",
136+
"Regex": "(?i)^4(?:[0-9]|[A-B])[a-z0-9]{93}$",
137+
"Description": null,
138+
"URL": "https://dogechain.info/address/",
139+
"Rarity": 0.5,
140+
"Tags": [
141+
"Cryptocurrency",
142+
"Finance"
143+
]
144+
},
145+
{
146+
"Name": "Litecoin (LTC) Wallet Address",
147+
"Regex": "(?i)^(?:L|M)[a-z0-9]{33}$",
148+
"Description": null,
149+
"URL": "https://live.blockcypher.com/ltc/address/",
150+
"Rarity": 0.5,
151+
"Tags": [
152+
"Cryptocurrency",
153+
"Finance"
154+
]
155+
},
156+
{
157+
"Name": "Ripple (XRP) Wallet Address",
158+
"Regex": "(?i)^r[a-z0-9]{33}$",
159+
"Description": null,
160+
"URL": "https://xrpscan.com/account/",
161+
"Rarity": 0.5,
162+
"Tags": [
163+
"Cryptocurrency",
164+
"Finance"
165+
]
166+
},
101167
{
102168
"Name": "American Express Card Number",
103169
"Regex": "^3[47][0-9]{13}$",
@@ -239,24 +305,14 @@
239305
},
240306
{
241307
"Name": "Email Address",
242-
"Regex": "(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+$)",
308+
"Regex": "(?i)(^[a-z0-9_.+-]+@[a-z0-9-]+\\.[a-z0-9-.]+$)",
243309
"Description": null,
244310
"Rarity": 0.5,
245311
"Tags": [
246312
"Identifiers",
247313
"Credentials"
248314
]
249315
},
250-
{
251-
"Name": "YouTube Video ID",
252-
"Regex": "^[0-9A-Za-z_-]{10}[048AEIMQUYcgkosw]{1}$",
253-
"Description": null,
254-
"URL": "https://www.youtube.com/watch?v=",
255-
"Rarity": 0.4,
256-
"Tags": [
257-
"Media"
258-
]
259-
},
260316
{
261317
"Name": "YouTube Channel ID",
262318
"Regex": "^UC[0-9A-Za-z_-]{21}[AQgw]{1}$",
@@ -268,13 +324,13 @@
268324
]
269325
},
270326
{
271-
"Name": "Internet Protocol (IP) Address",
272-
"Regex": "(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)",
327+
"Name": "YouTube Video ID",
328+
"Regex": "^(?=.*[A-Z])(?=.*[a-z])[0-9A-Za-z_-]{10}[048AEIMQUYcgkosw]{1}$",
273329
"Description": null,
274-
"Rarity": 0.4,
330+
"URL": "https://www.youtube.com/watch?v=",
331+
"Rarity": 0.2,
275332
"Tags": [
276-
"Identifiers",
277-
"Networking"
333+
"Media"
278334
]
279335
},
280336
{

pywhat/printer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22

3-
from rich.console import Console, OverflowMethod
3+
from rich.console import Console
44
from rich.table import Table
55

66

tests/test_click.py

+73-3
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def test_arg_parsing2():
108108
runner = CliRunner()
109109
result = runner.invoke(main, ["http://10.1.1.1"])
110110
assert result.exit_code == 0
111-
assert re.findall("URL", str(result.output))
111+
assert re.findall("Internet Protocol", str(result.output))
112112

113113

114114
def test_file_fixture_visa():
@@ -175,11 +175,32 @@ def test_file_fixture_youtube_id():
175175
assert re.findall("YouTube", str(result.output))
176176

177177

178-
def test_file_fixture_ip():
178+
def test_file_fixture_ip4():
179179
runner = CliRunner()
180180
result = runner.invoke(main, ["fixtures/file"])
181181
assert result.exit_code == 0
182-
assert re.findall("Internet Protocol", str(result.output))
182+
assert re.findall("Address Version 4", str(result.output))
183+
184+
185+
def test_file_fixture_ip4_shodan():
186+
runner = CliRunner()
187+
result = runner.invoke(main, ["118.103.238.230"])
188+
assert result.exit_code == 0
189+
assert re.findall("shodan", str(result.output))
190+
191+
192+
def test_file_fixture_ip6():
193+
runner = CliRunner()
194+
result = runner.invoke(main, ["fixtures/file"])
195+
assert result.exit_code == 0
196+
assert re.findall("Address Version 6", str(result.output))
197+
198+
199+
def test_file_fixture_ip6_shodan():
200+
runner = CliRunner()
201+
result = runner.invoke(main, ["2001:0db8:85a3:0000:0000:8a2e:0370:7334"])
202+
assert result.exit_code == 0
203+
assert re.findall("shodan", str(result.output))
183204

184205

185206
def test_file_fixture_ssn():
@@ -204,6 +225,55 @@ def test_file_coords():
204225
assert re.findall("Latitude", str(result.output))
205226

206227

228+
def test_file_fixture_ltc():
229+
runner = CliRunner()
230+
result = runner.invoke(main, ["fixtures/file"])
231+
assert result.exit_code == 0
232+
assert re.findall("Litecoin", str(result.output))
233+
234+
235+
def test_file_fixture_ltc2():
236+
runner = CliRunner()
237+
result = runner.invoke(main, ["fixtures/file"])
238+
assert result.exit_code == 0
239+
assert re.findall("live.block", str(result.output))
240+
241+
242+
def test_file_fixture_bch():
243+
runner = CliRunner()
244+
result = runner.invoke(main, ["fixtures/file"])
245+
assert result.exit_code == 0
246+
assert re.findall("Bitcoin Cash", str(result.output))
247+
248+
249+
def test_file_fixture_bch2():
250+
runner = CliRunner()
251+
result = runner.invoke(main, ["bitcoincash:qzlg6uvceehgzgtz6phmvy8gtdqyt6vf359at4n3lq"])
252+
assert result.exit_code == 0
253+
assert re.findall("blockchain", str(result.output))
254+
255+
256+
def test_file_fixture_xrp():
257+
runner = CliRunner()
258+
result = runner.invoke(main, ["fixtures/file"])
259+
assert result.exit_code == 0
260+
assert re.findall("Ripple", str(result.output))
261+
262+
263+
def test_file_fixture_xrp2():
264+
runner = CliRunner()
265+
result = runner.invoke(main, ["fixtures/file"])
266+
assert result.exit_code == 0
267+
assert re.findall("xrpscan", str(result.output))
268+
269+
270+
def test_file_fixture_xmr():
271+
runner = CliRunner()
272+
result = runner.invoke(main, ["fixtures/file"])
273+
assert result.exit_code == 0
274+
assert re.findall("Monero", str(result.output))
275+
276+
207277
def test_file_cors():
208278
runner = CliRunner()
209279
result = runner.invoke(main, ["Access-Control-Allow: *"])

tests/test_identifier.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
def test_identifier_works():
55
r = identifier.Identifier()
66
out = r.identify("DANHz6EQVoWyZ9rER56DwTXHWUxfkv9k2o")
7-
assert "Dogecoin Wallet Address" in out["Regexes"][0]["Regex Pattern"]["Name"]
7+
assert "Dogecoin (DOGE) Wallet Address" in out["Regexes"][0]["Regex Pattern"]["Name"]
88

99

1010
def test_identifier_spanish():

0 commit comments

Comments
 (0)