Skip to content

Commit 85ee4a1

Browse files
Decode non-base64 image data URI bytes safely
1 parent 6799a5a commit 85ee4a1

2 files changed

Lines changed: 97 additions & 2 deletions

File tree

OfficeIMO.Html/Images/HtmlImageDataUri.cs

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using System.IO;
12
using System.Text;
23

34
namespace OfficeIMO.Html;
@@ -74,7 +75,7 @@ public static bool TryParse(string? source, out HtmlImageDataUri dataUri) {
7475
/// </summary>
7576
public byte[] DecodeBytes() {
7677
if (!IsBase64) {
77-
return Encoding.UTF8.GetBytes(Uri.UnescapeDataString(Data));
78+
return DecodePercentEncodedBytes(Data);
7879
}
7980

8081
string payload = NormalizeBase64Payload(Uri.UnescapeDataString(Data));
@@ -110,7 +111,7 @@ public string DecodeText() {
110111
/// </summary>
111112
public long EstimateDecodedByteCount() {
112113
if (!IsBase64) {
113-
return Encoding.UTF8.GetByteCount(Uri.UnescapeDataString(Data));
114+
return CountPercentDecodedBytes(Data);
114115
}
115116

116117
string payload = NormalizeBase64Payload(Uri.UnescapeDataString(Data));
@@ -127,6 +128,91 @@ public long EstimateDecodedByteCount() {
127128
return (long)Math.Ceiling(length / 4D) * 3L - padding;
128129
}
129130

131+
private static byte[] DecodePercentEncodedBytes(string data) {
132+
using var stream = new MemoryStream();
133+
StringBuilder? text = null;
134+
for (int i = 0; i < data.Length; i++) {
135+
char ch = data[i];
136+
if (ch == '%') {
137+
FlushTextBytes(text, stream);
138+
text?.Clear();
139+
stream.WriteByte(ReadEscapedByte(data, i));
140+
i += 2;
141+
continue;
142+
}
143+
144+
text ??= new StringBuilder();
145+
text.Append(ch);
146+
}
147+
148+
FlushTextBytes(text, stream);
149+
return stream.ToArray();
150+
}
151+
152+
private static long CountPercentDecodedBytes(string data) {
153+
long count = 0;
154+
int textStart = 0;
155+
for (int i = 0; i < data.Length; i++) {
156+
if (data[i] != '%') {
157+
continue;
158+
}
159+
160+
if (i > textStart) {
161+
count += Encoding.UTF8.GetByteCount(data.Substring(textStart, i - textStart));
162+
}
163+
164+
_ = ReadEscapedByte(data, i);
165+
count++;
166+
i += 2;
167+
textStart = i + 1;
168+
}
169+
170+
if (textStart < data.Length) {
171+
count += Encoding.UTF8.GetByteCount(data.Substring(textStart));
172+
}
173+
174+
return count;
175+
}
176+
177+
private static void FlushTextBytes(StringBuilder? text, MemoryStream stream) {
178+
if (text == null || text.Length == 0) {
179+
return;
180+
}
181+
182+
byte[] bytes = Encoding.UTF8.GetBytes(text.ToString());
183+
stream.Write(bytes, 0, bytes.Length);
184+
}
185+
186+
private static byte ReadEscapedByte(string data, int percentIndex) {
187+
if (percentIndex + 2 >= data.Length
188+
|| !TryReadHex(data[percentIndex + 1], out byte high)
189+
|| !TryReadHex(data[percentIndex + 2], out byte low)) {
190+
throw new UriFormatException("Invalid percent escape in data URI payload.");
191+
}
192+
193+
return (byte)((high << 4) | low);
194+
}
195+
196+
private static bool TryReadHex(char value, out byte nibble) {
197+
if (value >= '0' && value <= '9') {
198+
nibble = (byte)(value - '0');
199+
return true;
200+
}
201+
202+
if (value >= 'A' && value <= 'F') {
203+
nibble = (byte)(value - 'A' + 10);
204+
return true;
205+
}
206+
207+
if (value >= 'a' && value <= 'f') {
208+
nibble = (byte)(value - 'a' + 10);
209+
return true;
210+
}
211+
212+
nibble = 0;
213+
return false;
214+
}
215+
130216
private static string GetDataUriContentType(string metadata) {
131217
if (string.IsNullOrWhiteSpace(metadata)) {
132218
return string.Empty;

OfficeIMO.Tests/Html.Core.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,15 @@ public void HtmlImageDataUri_TryDecodeBytesReturnsFalseForBadEscapes() {
235235
Assert.Empty(bytes);
236236
}
237237

238+
[Fact]
239+
public void HtmlImageDataUri_DecodesNonBase64PercentEscapesAsBytes() {
240+
Assert.True(HtmlImageDataUri.TryParse("data:image/png,%89PNG%0D%0A%1A%0A", out var image));
241+
242+
Assert.False(image.IsBase64);
243+
Assert.Equal(new byte[] { 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A }, image.DecodeBytes());
244+
Assert.Equal(8, image.EstimateDecodedByteCount());
245+
}
246+
238247
[Fact]
239248
public void HtmlImageDataUri_MatchesOnlyExactBase64Flag() {
240249
string svg = "<svg xmlns=\"http://www.w3.org/2000/svg\"/>";

0 commit comments

Comments
 (0)