Skip to content

Commit 0c2bc2b

Browse files
committed
Fix an issue when reading images in multithreading and where the stream get disposed too soon.
1 parent 88be1c4 commit 0c2bc2b

File tree

2 files changed

+76
-75
lines changed

2 files changed

+76
-75
lines changed

src/Html2OpenXml/IO/ImagePrefetcher.cs

Lines changed: 45 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ sealed class ImagePrefetcher<T> : IImageLoader
5252
private readonly T hostingPart;
5353
private readonly IWebRequest resourceLoader;
5454
private readonly HtmlImageInfoCollection prefetchedImages;
55-
private readonly object lockObject = new object();
55+
private readonly object lockObject = new();
5656
private readonly ImageProcessingMode processingMode;
5757

5858

@@ -68,7 +68,7 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader, ImageProcessin
6868
this.hostingPart = hostingPart;
6969
this.resourceLoader = resourceLoader;
7070
this.processingMode = processingMode;
71-
this.prefetchedImages = new HtmlImageInfoCollection();
71+
this.prefetchedImages = [];
7272
}
7373

7474
//____________________________________________________________________
@@ -133,65 +133,38 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader, ImageProcessin
133133
/// </summary>
134134
private async Task<HtmlImageInfo?> DownloadRemoteImage(string src, CancellationToken cancellationToken)
135135
{
136-
Uri imageUri = new Uri(src, UriKind.RelativeOrAbsolute);
136+
Uri imageUri = new(src, UriKind.RelativeOrAbsolute);
137137
if (imageUri.IsAbsoluteUri && !resourceLoader.SupportsProtocol(imageUri.Scheme))
138138
return null;
139139

140-
Resource? response;
141-
142-
response = await resourceLoader.FetchAsync(imageUri, cancellationToken).ConfigureAwait(false);
140+
using var response = await resourceLoader.FetchAsync(imageUri, cancellationToken).ConfigureAwait(false);
143141
if (response?.Content == null)
144142
return null;
145143

146-
using (response)
144+
// Copy the stream into a MemoryStream to avoid ObjectDisposedException
145+
using var buffer = new MemoryStream();
146+
response.Content.CopyTo(buffer);
147+
buffer.Seek(0, SeekOrigin.Begin);
148+
149+
// For requested url with no filename, we need to read the media mime type if provided
150+
response.Headers.TryGetValue("Content-Type", out var mime);
151+
if (!TryInspectMimeType(mime, out PartTypeInfo type)
152+
&& !TryGuessTypeFromUri(imageUri, out type)
153+
&& !TryGuessTypeFromStream(buffer, out type)
154+
)
147155
{
148-
// For requested url with no filename, we need to read the media mime type if provided
149-
response.Headers.TryGetValue("Content-Type", out var mime);
150-
if (!TryInspectMimeType(mime, out PartTypeInfo type)
151-
&& !TryGuessTypeFromUri(imageUri, out type)
152-
&& !TryGuessTypeFromStream(response.Content, out type))
153-
{
154-
return null;
155-
}
156-
157-
// Generate a unique GUID-based relationship ID for the image part
158-
string relationshipId = "img_" + Guid.NewGuid().ToString("N");
159-
160-
// Synchronize access to AddImagePart to prevent concurrent modifications to the OpenXml document
161-
ImagePart ipart;
162-
lock (lockObject)
163-
{
164-
ipart = hostingPart.AddImagePart(type, relationshipId);
165-
}
166-
167-
Size originalSize;
168-
using (var outputStream = ipart.GetStream(FileMode.Create))
169-
{
170-
response.Content.CopyTo(outputStream);
171-
172-
outputStream.Seek(0L, SeekOrigin.Begin);
173-
originalSize = GetImageSize(outputStream);
174-
}
175-
176-
string partId;
177-
lock (lockObject)
178-
{
179-
partId = hostingPart.GetIdOfPart(ipart);
180-
}
181-
182-
return new HtmlImageInfo(src, partId) {
183-
TypeInfo = type,
184-
Size = originalSize
185-
};
156+
return null;
186157
}
158+
159+
return SaveImageAssert(src, type, buffer.CopyTo);
187160
}
188161

189162
/// <summary>
190163
/// Create an external relationship to an image without downloading it.
191164
/// </summary>
192165
private HtmlImageInfo? CreateExternalImageLink(string src)
193166
{
194-
Uri imageUri = new Uri(src, UriKind.RelativeOrAbsolute);
167+
Uri imageUri = new(src, UriKind.RelativeOrAbsolute);
195168

196169
// Resolve relative URIs if possible (only for DefaultWebRequest which has BaseImageUrl)
197170
if (!imageUri.IsAbsoluteUri && resourceLoader is DefaultWebRequest defaultWebRequest
@@ -220,8 +193,7 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader, ImageProcessin
220193

221194
// Return image info with external flag set
222195
// Note: Size will be empty as we don't download the image
223-
return new HtmlImageInfo(src, relationshipId)
224-
{
196+
return new HtmlImageInfo(src, relationshipId) {
225197
IsExternal = true,
226198
Size = Size.Empty,
227199
TypeInfo = ImagePartType.Png // Default type, actual type doesn't matter for external links
@@ -235,39 +207,37 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader, ImageProcessin
235207
{
236208
if (DataUri.TryCreate(src, out var dataUri))
237209
{
238-
Size originalSize;
239210
knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type);
240-
// Generate a unique GUID-based relationship ID for the image part
241-
string relationshipId = "img_" + Guid.NewGuid().ToString("N");
242-
243-
// Synchronize access to AddImagePart to prevent concurrent modifications to the OpenXml document
244-
ImagePart ipart;
245-
lock (lockObject)
246-
{
247-
ipart = hostingPart.AddImagePart(type, relationshipId);
248-
}
249-
250-
using (var outputStream = ipart.GetStream(FileMode.Create))
251-
{
252-
outputStream.Write(dataUri.Data, 0, dataUri.Data.Length);
253211

254-
outputStream.Seek(0L, SeekOrigin.Begin);
255-
originalSize = GetImageSize(outputStream);
256-
}
212+
return SaveImageAssert(src, type, stream => stream.Write(dataUri.Data, 0, dataUri.Data.Length));
213+
}
257214

258-
string partId;
259-
lock (lockObject)
260-
{
261-
partId = hostingPart.GetIdOfPart(ipart);
262-
}
215+
return null;
216+
}
263217

264-
return new HtmlImageInfo(src, partId) {
265-
TypeInfo = type,
266-
Size = originalSize
267-
};
218+
private HtmlImageInfo SaveImageAssert(string src, PartTypeInfo type, Action<Stream> writeImage)
219+
{
220+
ImagePart ipart;
221+
string relationshipId = "img_" + Guid.NewGuid().ToString("N");
222+
lock (lockObject)
223+
{
224+
ipart = hostingPart.AddImagePart(type, relationshipId);
268225
}
269226

270-
return null;
227+
Size originalSize;
228+
using (var outputStream = ipart.GetStream(FileMode.Create))
229+
{
230+
writeImage(outputStream);
231+
outputStream.Seek(0L, SeekOrigin.Begin);
232+
originalSize = GetImageSize(outputStream);
233+
}
234+
235+
string partId = hostingPart.GetIdOfPart(ipart);
236+
return new HtmlImageInfo(src, partId)
237+
{
238+
TypeInfo = type,
239+
Size = originalSize
240+
};
271241
}
272242

273243
//____________________________________________________________________

test/HtmlToOpenXml.Tests/ImgTests.cs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,37 @@ await converter.ParseBody(
422422
Assert.That(mainPart.ImageParts.Count(), Is.EqualTo(1));
423423
}
424424

425+
[Test(Description = "Prevent concurrent access to the OpenXml mainPart")]
426+
public async Task MultipleImgInTag_ReturnsTwoDistinctImage()
427+
{
428+
var webRequest = new Mock<IO.IWebRequest>();
429+
webRequest.Setup(x => x.SupportsProtocol(It.IsAny<string>())).Returns(true);
430+
webRequest.Setup(x => x.FetchAsync(It.IsAny<Uri>(), It.IsAny<CancellationToken>()))
431+
.Returns(Task.FromResult<IO.Resource?>(new() {
432+
Content = new MemoryStream(Convert.FromBase64String(@"/9j/4AAQSkZJRgABAQAAAQABAAD/4gKgSUNDX1BST0ZJTEUAAQEAAAKQbGNtcwQwAABtbnRyUkdCIFhZWiAH4QAHAAEAAAABAAZhY3NwQVBQTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA9tYAAQAAAADTLWxjbXMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAtkZXNjAAABCAAAADhjcHJ0AAABQAAAAE53dHB0AAABkAAAABRjaGFkAAABpAAAACxyWFlaAAAB0AAAABRiWFlaAAAB5AAAABRnWFlaAAAB+AAAABRyVFJDAAACDAAAACBnVFJDAAACLAAAACBiVFJDAAACTAAAACBjaHJtAAACbAAAACRtbHVjAAAAAAAAAAEAAAAMZW5VUwAAABwAAAAcAHMAUgBHAEIAIABiAHUAaQBsAHQALQBpAG4AAG1sdWMAAAAAAAAAAQAAAAxlblVTAAAAMgAAABwATgBvACAAYwBvAHAAeQByAGkAZwBoAHQALAAgAHUAcwBlACAAZgByAGUAZQBsAHkAAAAAWFlaIAAAAAAAAPbWAAEAAAAA0y1zZjMyAAAAAAABDEoAAAXj///zKgAAB5sAAP2H///7ov///aMAAAPYAADAlFhZWiAAAAAAAABvlAAAOO4AAAOQWFlaIAAAAAAAACSdAAAPgwAAtr5YWVogAAAAAAAAYqUAALeQAAAY3nBhcmEAAAAAAAMAAAACZmYAAPKnAAANWQAAE9AAAApbcGFyYQAAAAAAAwAAAAJmZgAA8qcAAA1ZAAAT0AAACltwYXJhAAAAAAADAAAAAmZmAADypwAADVkAABPQAAAKW2Nocm0AAAAAAAMAAAAAo9cAAFR7AABMzQAAmZoAACZmAAAPXP/bAEMABQMEBAQDBQQEBAUFBQYHDAgHBwcHDwsLCQwRDxISEQ8RERMWHBcTFBoVEREYIRgaHR0fHx8TFyIkIh4kHB4fHv/bAEMBBQUFBwYHDggIDh4UERQeHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHv/CABEIAX4BfgMBIgACEQEDEQH/xAAcAAEBAAIDAQEAAAAAAAAAAAAAAQcIBAUGAgP/xAAUAQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIQAxAAAAHMoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADxmKzYPj6q8c22/fUTuzaFh3KZzwAAAAAAAAAAAAAAAAAAflhJjUlAlIonZ9ZTZH1epuyB6IAAAAAAAAAAAAAAAADxvstezwv1BYFlhYFIX1PlRt19+O9iAAAAAAAAAAAAAAAAfOpu0+px9AlCUIsFCKMrZmwFns+gAAAAAAAAAAAAAAAfhqXt3rAdDYKgsCoKQqDI2dcUZYKAAAAAAAAAAAAAAABiPLnENTna9UShKEUATk8fJ5kn0nH5AAAAAAAAAAAAAAAAAB5jXbbDzxrHfWeSFQWBeTkg6LOTtSgAAAAAAAAAAAAAAAAAA/Pyvrhhvp89/Jgv0OU/o873fIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH//EACYQAAEEAQQBBAMBAAAAAAAAAAQBAgMFBgARMFASFCAxkBMVIiP/2gAIAQEAAQUC+xZXI3XqhtMmif1lvkINep+TWpOpZppdbaT+VDtrERazMF3EKgLh6Z7msbkeSSkr7684qvnoLiG1g6XMrlSJuEQiYQijso7ILo8ssf19Xx4xYOr7Ni+TeizYv1N1x7axUz1dV0Ll2QuVZy+TAZtk6E534wm/HJhDlQ5PjoJm+cPirV5MIZ/u346HIhlEuuTCh1YF0Wfg7t4xYXEEVECRRdEVBGTBbASVx3FilarUgZ4M6O/qYrQU0WcMjgoKhxDgoPBOlt6wWyhtqA0F3tGHmJfT4+jHCCozqHsa5LCjCL0TibNPxYnePF36ExoVijVzY2xxNYnV+Ka8U+xj/8QAFBEBAAAAAAAAAAAAAAAAAAAAkP/aAAgBAwEBPwEQP//EABQRAQAAAAAAAAAAAAAAAAAAAJD/2gAIAQIBAT8BED//xAA7EAABAgIGBAoJBQEAAAAAAAABAgMAEQQSITFBUCIwUXEFEBMjMlJhYpHBIDNCU3KQobHhFDSBgpLR/9oACAEBAAY/AvmLaRA3x+4Z/wBiNFxB3HLC3W5d7qN4bzBDbgoyNjYt8THOvOOnvKnF30iYmD2RzNMdl1VGY8IqcIsWe8aH3EB2jupcQcU5OVKIAFpMGjcHrU2z7TgvXu2ajlqK6UHEYK3xZoPJ6beTK4PoypMoscI9s7N2qQ+wsoWk2Ql5Nirlp2HJDyapPu6DfZtPFdqk28y7oL8jE8jU0OhRxUG+861lSjNQFVW8ZFOHnyfWLUrxOtpDPereI/GRPr6raj9NXjxvDuD75EtHWEoqm8WcV3oXem+v4RkdKawr1huNutC/erreQyNrhBA6Og55axthF6zKEpSJJQJDI1sOiaFiRhdFdwtSqXSG3V/qnEmusaPYmJZJUVouJ9WuV34g0ekIqrH13akUikIk17KZdP8AEVjfk1R9EzgrEQSlJfa6yRb/ACPSqMNKWezCA5SpOrwTgP8AsTN+UWiJrYTPaLD4xzTzqd9sWUlEvgjTpR/qiBWbW8e+fKAkJSlIwAlFgyy6LvmMf//EACkQAAIBAQcEAQUBAAAAAAAAAAABESExQVFhcZGhUIGx8OEQQMHR8ZD/2gAIAQEAAT8h/wA710aRVOqD0j8nAesp0ufpLhst7S5eQ8aTLVnWnNQO5zpvy2RwbP0T8iHAjOKvu+ptBFqSdopqvw+xYb1Zw8Hk+jpKtIcJK+pYmhQeYnLyQlFElt8FlqJImzwX/BfVF1CzGsqpYLf+Jo0NjWPWdVmsU+jNgsSzo26HL0Ikj2GU9RGa5Ix8EZ8M9sIzQlrsSBzPD7p4p4FtH1tb9aX6dEbWBzP0Vy0JRSB5CvqGpu4KwWXfRqbuCq/RTDyVHC2oosVXwPgShehva3BqXob+OxGuxuQ89jcinwVIyexuJR/Dc3KrUy0AOdroN97ehIc1xkgExGa+DQ0Njvwd+D2z6aMpA/o6QVF4DF3QaI/gGYlNXYvBGFexGTIz4Y8yJvWwlqR7BrJF8kRaIvkXcUOYcmqDTLoK2ENNbuoGzVVbUsaoyrdTRNivqIk7HZsRlwZEUEqWIrgivqHuFT9jEhV0FjQpp2p5DzBGRDuIIuggh4EQRAkQQ8CMUMUqs7FH7BWdCdpu7m633cdy8g9sKE3/AE9sL5JpaXECCKVHFive1SPsOmlC+zf2Cjb9eQrJ6UbZa/jIIzZKIvngjEiHayUQrZVMiNSFbJZ+ErVcbV+NRKk+0X2FDU91TfB4tgMk5eiXNr08S+3wb/TRLZFt3gri/e5ZUlJWIe6Rb1ZmvT0FwvRlEVsqPxT9QiZLKYtWe6lCh2NPv8kMrd5I9n5IfrOKJtTbhd2QTGHe+eLgQpYSSp0eBIHzev8AyVTw9pfwxHuxsnw2PPvfmY3M4EbIR6akxEeR0uExttTYSbmwklYv9F//2gAMAwEAAgADAAAAEPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPOEPPPPPPPPPPPPPPPPPPKOEGPPPPPPPPPPPPPPPPPPPCAAAEPPPPPPPPPPPPPPPPPPLIAAAAEHPPPPPPPPPPPPPPPPFAAABAABPPPPPPPPPPPPPPPPBAAEAAABPPPPPPPPOPPPPPPPLCAAAAFPPPPPPPPPPHPPPPPPPHCAAAJPPPPPPPPPPPPPPPPPPPDLDPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPOPPPPPPPPPPPPPPPPPPPPPPPDPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPKPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP/EABQRAQAAAAAAAAAAAAAAAAAAAJD/2gAIAQMBAT8QED//xAAUEQEAAAAAAAAAAAAAAAAAAACQ/9oACAECAQE/EBA//8QAKxAAAQIDBgUFAQEAAAAAAAAAAQARECExIEFQcZHwUWGB0fEwkKGx4UDB/9oACAEBAAE/EPcVFib9eIQprKgdJjoC+FDEzxCNghCi8ZAABmUKmHgw60qRAr29L1UAZaD5iCF/QZQDg4BhRWEVRQI8k9OZbJzKRuQgAobAUgFw5oQgArA6aeDWDdEmcxPW4CbDAFGouiAfaeRCGDirM/QEazHUgX4HNgh9n/WQDCADYQEjkjIaEN2JTzYnEVTVIwMiE1EnaBJQaAGgTQA0EMjIeBEXbOkYDVgVA4HKdAFxiAX/AEiDbApZAPsLiGAhzNCQEngp5EA2FZhBM1oEBT+F5gmnkwEAoLjfAY7sDmWdgA1RA2daHCFshEhnZDYEKQa73fHQjAGReR6KBAGQe7wu1KjgXLO/9RHRgPhEMAprWEOeN9Eh3QmfZr+QAU9cvFp3kiSDtswrwDYAzFsAAADSaU9+AIAZNsEGZWJHT3AhVaoEDcEKfMQAxAEM6J0yBXCYMQsCSdEkkDgACCSP9K2BPlADCLqEAB0wAs1A7DogDMHcCCA6egIcoAggieI4BucuMo4wQrApAUZBhiCGoCraShe4xMP/2Q==")),
433+
StatusCode = System.Net.HttpStatusCode.OK
434+
}));
435+
converter = new HtmlConverter(mainPart, webRequest.Object);
436+
437+
Assert.DoesNotThrowAsync(async () => {
438+
await converter.ParseBody(@"
439+
<table>
440+
<tr>
441+
<td>
442+
<img src='https://domain/a.png'>
443+
<img src='https://domain/b.png'>
444+
</td>
445+
</tr>
446+
</table>");
447+
}, "Should not raise ObjectDisposedException");
448+
449+
webRequest.Verify(s => s.FetchAsync(
450+
It.IsAny<Uri>(),
451+
It.IsAny<CancellationToken>()),
452+
Times.Exactly(2));
453+
Assert.That(mainPart.ImageParts.Count(), Is.EqualTo(2));
454+
}
455+
425456
private static (Drawing, ImagePart) AssertIsImg(OpenXmlPartContainer container, OpenXmlElement paragraph)
426457
{
427458
var run = paragraph.GetFirstChild<Run>();

0 commit comments

Comments
 (0)