Skip to content

Commit 722644b

Browse files
authored
Handle inline favicons (#4047 #3891 )
1 parent 0dbfb02 commit 722644b

File tree

4 files changed

+154
-28
lines changed

4 files changed

+154
-28
lines changed

changedetectionio/content_fetchers/res/favicon-fetcher.js

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,39 @@
3838
if (a.size !== b.size) {
3939
return b.size - a.size;
4040
}
41-
41+
4242
// Second priority: apple-touch-icon over regular icon
4343
const isAppleA = /apple-touch-icon/.test(a.rel);
4444
const isAppleB = /apple-touch-icon/.test(b.rel);
4545
if (isAppleA && !isAppleB) return -1;
4646
if (!isAppleA && isAppleB) return 1;
47-
47+
4848
// Third priority: icons with no size attribute (fallback icons) last
4949
const hasNoSizeA = !a.hasSizes;
5050
const hasNoSizeB = !b.hasSizes;
5151
if (hasNoSizeA && !hasNoSizeB) return 1;
5252
if (!hasNoSizeA && hasNoSizeB) return -1;
53-
53+
5454
return 0;
5555
});
5656

5757
const timeoutMs = 2000;
58+
// 1 MB — matches the server-side limit in bump_favicon()
59+
const MAX_BYTES = 1 * 1024 * 1024;
5860

5961
for (const icon of icons) {
6062
try {
63+
// Inline data URI — no network fetch needed, data is already here
64+
if (icon.href.startsWith('data:')) {
65+
const match = icon.href.match(/^data:([^;]+);base64,([A-Za-z0-9+/=]+)$/);
66+
if (!match) continue;
67+
const mime_type = match[1];
68+
const base64 = match[2];
69+
// Rough size check: base64 is ~4/3 the binary size
70+
if (base64.length * 0.75 > MAX_BYTES) continue;
71+
return { url: icon.href, mime_type, base64 };
72+
}
73+
6174
const controller = new AbortController();
6275
const timeout = setTimeout(() => controller.abort(), timeoutMs);
6376

@@ -74,12 +87,15 @@
7487

7588
const blob = await resp.blob();
7689

90+
if (blob.size > MAX_BYTES) continue;
91+
7792
// Convert blob to base64
7893
const reader = new FileReader();
7994
return await new Promise(resolve => {
8095
reader.onloadend = () => {
8196
resolve({
8297
url: icon.href,
98+
mime_type: blob.type,
8399
base64: reader.result.split(",")[1]
84100
});
85101
};
@@ -98,4 +114,3 @@
98114
// Auto-execute and return result for page.evaluate()
99115
return await window.getFaviconAsBlob();
100116
})();
101-

changedetectionio/model/Watch.py

Lines changed: 54 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -798,24 +798,50 @@ def favicon_is_expired(self):
798798
# Also in the case that the file didnt exist
799799
return True
800800

801-
def bump_favicon(self, url, favicon_base_64: str) -> None:
801+
def bump_favicon(self, url, favicon_base_64: str, mime_type: str = None) -> None:
802802
from urllib.parse import urlparse
803803
import base64
804804
import binascii
805-
decoded = None
805+
import re
806+
807+
MAX_FAVICON_BYTES = 1 * 1024 * 1024 # 1 MB
808+
809+
MIME_TO_EXT = {
810+
'image/png': 'png',
811+
'image/x-icon': 'ico',
812+
'image/vnd.microsoft.icon': 'ico',
813+
'image/jpeg': 'jpg',
814+
'image/gif': 'gif',
815+
'image/svg+xml': 'svg',
816+
'image/webp': 'webp',
817+
'image/bmp': 'bmp',
818+
}
819+
820+
extension = None
821+
822+
# If the caller already resolved the MIME type (e.g. from blob.type or a data URI),
823+
# use that directly — it's more reliable than guessing from a URL path.
824+
if mime_type:
825+
extension = MIME_TO_EXT.get(mime_type.lower().split(';')[0].strip(), None)
806826

807-
if url:
827+
# Fall back to extracting extension from URL path, unless it's a data URI.
828+
if not extension and url and not url.startswith('data:'):
808829
try:
809830
parsed = urlparse(url)
810831
filename = os.path.basename(parsed.path)
811-
(base, extension) = filename.lower().strip().rsplit('.', 1)
832+
(_base, ext) = filename.lower().strip().rsplit('.', 1)
833+
extension = ext
812834
except ValueError:
813-
logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
814-
return None
815-
else:
816-
# Assume favicon.ico
817-
base = "favicon"
818-
extension = "ico"
835+
logger.warning(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}', defaulting to ico")
836+
837+
# Handle data URIs: extract MIME type from the URI itself when not already known
838+
if not extension and url and url.startswith('data:'):
839+
m = re.match(r'^data:([^;]+);base64,', url)
840+
if m:
841+
extension = MIME_TO_EXT.get(m.group(1).lower(), None)
842+
843+
if not extension:
844+
extension = 'ico'
819845

820846
fname = os.path.join(self.data_dir, f"favicon.{extension}")
821847

@@ -824,22 +850,27 @@ def bump_favicon(self, url, favicon_base_64: str) -> None:
824850
decoded = base64.b64decode(favicon_base_64, validate=True)
825851
except (binascii.Error, ValueError) as e:
826852
logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
827-
else:
828-
if decoded:
829-
try:
830-
with open(fname, 'wb') as f:
831-
f.write(decoded)
853+
return None
832854

833-
# Invalidate module-level favicon filename cache for this watch
834-
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
855+
if len(decoded) > MAX_FAVICON_BYTES:
856+
logger.warning(f"UUID: {self.get('uuid')} Favicon too large ({len(decoded)} bytes), skipping")
857+
return None
835858

836-
# A signal that could trigger the socket server to update the browser also
837-
watch_check_update = signal('watch_favicon_bump')
838-
if watch_check_update:
839-
watch_check_update.send(watch_uuid=self.get('uuid'))
859+
try:
860+
with open(fname, 'wb') as f:
861+
f.write(decoded)
840862

841-
except Exception as e:
842-
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
863+
# Invalidate module-level favicon filename cache for this watch
864+
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
865+
866+
# A signal that could trigger the socket server to update the browser also
867+
watch_check_update = signal('watch_favicon_bump')
868+
if watch_check_update:
869+
watch_check_update.send(watch_uuid=self.get('uuid'))
870+
871+
except Exception as e:
872+
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
873+
return None
843874

844875
# @todo - Store some checksum and only write when its different
845876
logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")

changedetectionio/tests/test_security.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,85 @@ def test_favicon(client, live_server, measure_memory_usage, datastore_path):
5050
res = client.get(url_for('static_content', group='js', filename='../styles/styles.css'))
5151
assert res.status_code != 200
5252

53+
def test_favicon_inline_data_uri(client, live_server, measure_memory_usage, datastore_path):
54+
"""
55+
bump_favicon() must handle a data URI as the url parameter.
56+
Previously this logged "Cant work out file extension from 'data:image/png;base64,...'" and bailed.
57+
The mime_type from the data URI should be used to pick the correct extension.
58+
"""
59+
import base64
60+
import os
61+
62+
# 1x1 transparent PNG (minimal valid PNG bytes)
63+
PNG_BYTES = (
64+
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
65+
b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00\x01'
66+
b'\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82'
67+
)
68+
png_b64 = base64.b64encode(PNG_BYTES).decode()
69+
data_uri = f"data:image/png;base64,{png_b64}"
70+
71+
uuid = client.application.config.get('DATASTORE').add_watch(url='https://localhost')
72+
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
73+
74+
# Should NOT raise / bail — must save as favicon.png
75+
watch.bump_favicon(url=data_uri, favicon_base_64=png_b64, mime_type='image/png')
76+
77+
favicon_fname = watch.get_favicon_filename()
78+
assert favicon_fname is not None, "Favicon should have been saved"
79+
assert favicon_fname.endswith('.png'), f"Expected .png extension, got: {favicon_fname}"
80+
81+
full_path = os.path.join(watch.data_dir, favicon_fname)
82+
assert os.path.getsize(full_path) == len(PNG_BYTES)
83+
84+
# Also verify it's served correctly via the static route
85+
res = client.get(url_for('static_content', group='favicon', filename=uuid))
86+
assert res.status_code == 200
87+
assert res.data == PNG_BYTES
88+
89+
90+
def test_favicon_mime_type_overrides_url_extension(client, live_server, measure_memory_usage, datastore_path):
91+
"""
92+
mime_type parameter takes precedence over the URL path extension.
93+
A URL ending in .ico but with mime_type='image/png' should save as .png.
94+
"""
95+
import base64
96+
import os
97+
98+
PNG_BYTES = (
99+
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
100+
b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00\x01'
101+
b'\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82'
102+
)
103+
png_b64 = base64.b64encode(PNG_BYTES).decode()
104+
105+
uuid = client.application.config.get('DATASTORE').add_watch(url='https://localhost')
106+
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
107+
108+
watch.bump_favicon(url='https://example.com/favicon.ico', favicon_base_64=png_b64, mime_type='image/png')
109+
110+
favicon_fname = watch.get_favicon_filename()
111+
assert favicon_fname is not None
112+
assert favicon_fname.endswith('.png'), f"mime_type should override URL extension, got: {favicon_fname}"
113+
114+
115+
def test_favicon_oversized_rejected(client, live_server, measure_memory_usage, datastore_path):
116+
"""Favicons larger than 1 MB must be silently dropped."""
117+
import base64
118+
import os
119+
120+
oversized = b'\x00' * (1 * 1024 * 1024 + 1)
121+
oversized_b64 = base64.b64encode(oversized).decode()
122+
123+
uuid = client.application.config.get('DATASTORE').add_watch(url='https://localhost')
124+
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
125+
126+
result = watch.bump_favicon(url='https://example.com/big.png', favicon_base_64=oversized_b64, mime_type='image/png')
127+
128+
assert result is None, "bump_favicon should return None for oversized favicon"
129+
assert watch.get_favicon_filename() is None, "No favicon file should have been written"
130+
131+
53132
def test_bad_access(client, live_server, measure_memory_usage, datastore_path):
54133

55134
res = client.post(

changedetectionio/worker.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
484484
# Store favicon if necessary
485485
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
486486
watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
487-
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
487+
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64'),
488+
mime_type=update_handler.fetcher.favicon_blob.get('mime_type')
488489
)
489490

490491
datastore.update_watch(uuid=uuid, update_obj=final_updates)

0 commit comments

Comments
 (0)