Skip to content

Commit d4bafe4

Browse files
authored
Replace heuristic printable checker with simple UTF-8/ASCII validator (#2245)
1 parent 4b9104b commit d4bafe4

File tree

2 files changed

+25
-107
lines changed

2 files changed

+25
-107
lines changed

src/AudioTools/Communication/HTTP/HttpLineReader.h

Lines changed: 1 addition & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -72,67 +72,7 @@ class HttpLineReader {
7272
}
7373
str[result - 1] = 0;
7474
if (is_buffer_overflow) {
75-
// SAFETY FIX: Don't print potentially corrupted binary data with %s
76-
// Binary garbage can contain terminal escape codes or invalid UTF-8 that crashes Serial.printf()
77-
//
78-
// This fix prevents ESP32 hard resets when HTTP servers send malformed headers
79-
// Real-world trigger: http://fast.citrus3.com:2020/stream/wtmj-radio
80-
//
81-
// Strategy:
82-
// 1. Sanitize the actual buffer (prevents parser poisoning downstream)
83-
// 2. Count printable vs binary content
84-
// 3. Use hex dump for binary garbage (safer than string masking)
85-
// 4. Limit output to 256 bytes (prevents log spam)
86-
87-
int printable = 0;
88-
int non_printable = 0;
89-
int actual_len = 0;
90-
91-
// First pass: count and find actual length
92-
for (int i = 0; i < len && str[i] != 0; i++) {
93-
actual_len = i + 1;
94-
if (str[i] >= 32 && str[i] <= 126) {
95-
printable++;
96-
} else if (str[i] != '\r' && str[i] != '\n' && str[i] != '\t') {
97-
non_printable++;
98-
// CRITICAL: Sanitize the actual buffer to prevent parser poisoning
99-
// Replace binary garbage with space to avoid confusing HTTP header parser
100-
str[i] = ' ';
101-
}
102-
}
103-
104-
// Limit logging output to 256 bytes to prevent excessive serial spam
105-
int log_len = (actual_len > 256) ? 256 : actual_len;
106-
107-
// If mostly binary garbage (>50% non-printable), use hex dump for safety
108-
if (non_printable > printable) {
109-
LOGE("Line cut off: [%d bytes, %d binary chars - showing hex dump of first %d bytes]",
110-
actual_len, non_printable, (log_len > 32 ? 32 : log_len));
111-
112-
// Hex dump (safer than string output - never misinterpreted)
113-
// Show first 32 bytes maximum
114-
int hex_len = (log_len > 32) ? 32 : log_len;
115-
for (int i = 0; i < hex_len; i += 16) {
116-
char hex_line[64];
117-
int line_len = (hex_len - i > 16) ? 16 : (hex_len - i);
118-
int pos = 0;
119-
for (int j = 0; j < line_len; j++) {
120-
pos += snprintf(hex_line + pos, sizeof(hex_line) - pos, "%02X ", (uint8_t)str[i + j]);
121-
}
122-
LOGE(" %04X: %s", i, hex_line);
123-
}
124-
} else {
125-
// Mostly printable - safe to log as string (already sanitized in-place above)
126-
// Truncate to 256 bytes for logging
127-
if (log_len < actual_len) {
128-
char saved = str[log_len];
129-
str[log_len] = 0;
130-
LOGE("Line cut off: %s... [%d more bytes]", str, actual_len - log_len);
131-
str[log_len] = saved;
132-
} else {
133-
LOGE("Line cut off: %s", str);
134-
}
135-
}
75+
LOGE("HttpLineReader %s", "readlnInternal->cut off too long line");
13676
}
13777

13878
return result;

src/AudioTools/CoreAudio/AudioMetaData/MetaDataICY.h

Lines changed: 24 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
#pragma once
2-
#include <ctype.h> //isascii
32
#include "AudioToolsConfig.h"
43
#include "AbstractMetaData.h"
54
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
65
#include "AudioTools/Communication/HTTP/AbstractURLStream.h"
76

7+
#ifndef AUDIOTOOLS_METADATA_ICY_ASCII_ONLY
8+
#define AUDIOTOOLS_METADATA_ICY_ASCII_ONLY true
9+
#endif
10+
811
namespace audio_tools {
912

1013
/**
@@ -109,12 +112,7 @@ class MetaDataICY : public AbstractMetaData {
109112
metaDataLen = metaSize(ch);
110113
LOGI("metaDataLen: %d", metaDataLen);
111114
if (metaDataLen > 0) {
112-
// Enhanced validation: reject suspiciously large metadata (>4080 bytes = 255*16)
113-
// Also reject extremely small metadata that's unlikely to be valid
114-
if (metaDataLen > 4080 || metaDataLen < 16) {
115-
LOGW("Suspicious metaDataLen %d -> skipping metadata block", metaDataLen);
116-
nextStatus = ProcessData;
117-
} else if (metaDataLen > 200) {
115+
if (metaDataLen > 200) {
118116
LOGI("Unexpected metaDataLen -> processed as data");
119117
nextStatus = ProcessData;
120118
} else {
@@ -170,46 +168,28 @@ class MetaDataICY : public AbstractMetaData {
170168
/// determines the meta data size from the size byte
171169
virtual int metaSize(uint8_t metaSize) { return metaSize * 16; }
172170

173-
inline bool isAscii(uint8_t ch){ return ch < 128;}
174-
175-
/// Make sure that the result is a valid ASCII string with printable characters
176-
/// Enhanced validation to reject corrupted metadata before it affects audio stream
177-
virtual bool isAscii(char* result, int l) {
178-
if (l < 1) return false;
179-
180-
// Check entire metadata string, not just first 10 characters
181-
int printable_count = 0;
182-
int control_count = 0;
183-
171+
/// Make sure that the result is a printable string
172+
virtual bool isPrintable(const char* str, int l) {
173+
int remain = 0;
184174
for (int j = 0; j < l; j++) {
185-
uint8_t ch = (uint8_t)result[j];
186-
187-
// Reject non-ASCII bytes (>= 128)
188-
if (ch >= 128) return false;
189-
190-
// Count printable vs control characters
191-
if (ch >= 32 && ch <= 126) {
192-
printable_count++;
193-
} else if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0) {
194-
// Allow common control characters
195-
continue;
175+
uint8_t ch = str[j];
176+
if (remain) {
177+
if (ch < 0x80 || ch > 0xbf) return false;
178+
remain--;
196179
} else {
197-
// Unusual control character
198-
control_count++;
180+
if (ch < 0x80) { // ASCII
181+
if (ch != '\n' && ch != '\r' && ch != '\t' && ch < 32 || ch == 127)
182+
return false; // control chars
183+
}
184+
#if !AUDIOTOOLS_METADATA_ICY_ASCII_ONLY
185+
else if (ch >= 0xc2 && ch <= 0xdf) remain = 1;
186+
else if (ch >= 0xe0 && ch <= 0xef) remain = 2;
187+
else if (ch >= 0xf0 && ch <= 0xf4) remain = 3;
188+
#endif
189+
else return false;
199190
}
200191
}
201-
202-
// Require at least 50% printable characters to reject binary garbage
203-
// 50% threshold is the absolute minimum - accepts any ICY padding strategy
204-
// Binary garbage typically has < 30% printable, so 50% provides good separation
205-
// Super CFL: 68.8% (33/48) easily passes
206-
if (printable_count < (l * 0.50)) {
207-
LOGW("Metadata validation failed: only %d/%d printable (%.1f%%)",
208-
printable_count, l, (printable_count * 100.0) / l);
209-
return false;
210-
}
211-
212-
return true;
192+
return remain == 0;
213193
}
214194

215195
/// allocates the memory to store the metadata / we support changing sizes
@@ -226,8 +206,7 @@ class MetaDataICY : public AbstractMetaData {
226206
// CHECK_MEMORY();
227207
TRACED();
228208
metaData[len] = 0;
229-
// Use full validation on entire metadata string, not just first 12 bytes
230-
if (isAscii(metaData, len)) {
209+
if (isPrintable(metaData, len)) {
231210
LOGI("%s", metaData);
232211
StrView meta(metaData, len + 1, len);
233212
int start = meta.indexOf("StreamTitle=");
@@ -247,7 +226,6 @@ class MetaDataICY : public AbstractMetaData {
247226
// Don't print corrupted binary data - could contain terminal control codes
248227
LOGW("Unexpected Data: corrupted metadata block rejected (len=%d)", len);
249228
// Signal corruption to application so it can disconnect/reconnect
250-
// This is critical: metaint boundary is now desynchronized and audio will glitch
251229
if (callback != nullptr) {
252230
callback(Corrupted, nullptr, len);
253231
}

0 commit comments

Comments
 (0)