Skip to content

Commit 94a0660

Browse files
committed
Fixes #5076, manifest creation for non-HTML URLs
1 parent 640f8cf commit 94a0660

4 files changed

Lines changed: 80 additions & 47 deletions

File tree

apps/pwabuilder/Common/HttpClientExtensions.cs

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,30 +82,71 @@ public static class HttpClientExtensions
8282
/// <param name="cancelToken">Cancellation token.</param>
8383
/// <returns>The string fetched from the URI.</returns>
8484
/// <exception cref="InvalidOperationException">The response was longer than the max size.</exception>
85-
public static async Task<LimitedReadStreamWithMediaType> GetImageAsync(this HttpClient client, Uri requestUri, long maxSizeInBytes, CancellationToken cancelToken)
85+
public static async Task<LimitedReadStreamWithMediaType> GetStreamAsync(this HttpClient client, Uri requestUri, IEnumerable<string> accepts, long maxSizeInBytes, CancellationToken cancelToken)
8686
{
87-
var imageRequest = new HttpRequestMessage(HttpMethod.Get, requestUri);
87+
var streamRequest = new HttpRequestMessage(HttpMethod.Get, requestUri);
8888

8989
// Add the accept header for images.
90-
imageRequest.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("image/*"));
90+
// Add the accept header if provided.
91+
foreach (var acceptType in accepts)
92+
{
93+
streamRequest.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(acceptType));
94+
}
9195

9296
// Send it.
93-
var imageFetch = await client.SendAsync(imageRequest, cancelToken);
97+
var streamFetch = await client.SendAsync(streamRequest, cancelToken);
9498

9599
// We have a max size, so we need to check the content length.
96100
// First, ensure we've got success.
97-
imageFetch.EnsureSuccessStatusCode();
101+
streamFetch.EnsureSuccessStatusCode();
98102

99103
// See if we have a Content-Length header
100-
var contentLength = imageFetch.Content.Headers.ContentLength;
104+
var contentLength = streamFetch.Content.Headers.ContentLength;
101105
if (contentLength.HasValue && contentLength.Value > maxSizeInBytes)
102106
{
103107
throw new InvalidOperationException($"Attempted to fetch {requestUri}, but response content-length header says the response size ({contentLength.Value}) exceeds the maximum allowed size ({maxSizeInBytes}).");
104108
}
105109

106110
// Read in a string as a stream to ensure we don't exceed the max size.
107-
var stream = await imageFetch.Content.ReadAsStreamAsync(cancelToken);
108-
return new LimitedReadStreamWithMediaType(stream, maxSizeInBytes, imageFetch.Content.Headers.ContentType?.MediaType);
111+
var stream = await streamFetch.Content.ReadAsStreamAsync(cancelToken);
112+
return new LimitedReadStreamWithMediaType(stream, maxSizeInBytes, streamFetch.Content.Headers.ContentType?.MediaType);
113+
}
114+
115+
/// <summary>
116+
/// Fetches an image from the specified URI while providing an expected content type and a maximum response size.
117+
/// </summary>
118+
/// <param name="client">The HTTP client.</param>
119+
/// <param name="requestUri">The URI to request.</param>
120+
/// <param name="maxSizeInBytes">The maximum size in bytes of the response.</param>
121+
/// <param name="cancelToken">Cancellation token.</param>
122+
/// <returns>The string fetched from the URI.</returns>
123+
/// <exception cref="InvalidOperationException">The response was longer than the max size.</exception>
124+
public static Task<LimitedReadStreamWithMediaType> GetImageAsync(this HttpClient client, Uri requestUri, long maxSizeInBytes, CancellationToken cancelToken)
125+
{
126+
return GetStreamAsync(client, requestUri, ["image/*"], maxSizeInBytes, cancelToken);
127+
}
128+
129+
/// <summary>
130+
/// Makes a request to the URL and reads only the response headers. It throws an HttpRequestException if the response headers don't contain the specified content type.
131+
/// </summary>
132+
/// <param name="http">The HTTP client.</param>
133+
/// <param name="cancelToken">The cancellation token.</param>
134+
/// <param name="responseContentType">The expected response content type. If the response doesn't contain this content type header, an HttpRequestException will be thrown.</param>
135+
/// <exception cref="HttpRequestException">The request didn't have a response content-type containing <paramref name="responseContentType"/>.
136+
public static void EnsureContentType(this HttpResponseMessage response, string responseContentType)
137+
{
138+
// If there is no content type header, consider this test skipped.
139+
var contentType = response.Content.Headers.ContentType;
140+
if (string.IsNullOrEmpty(contentType?.MediaType))
141+
{
142+
throw new HttpRequestException($"Expected a response content-type header of {responseContentType}, but it had none.");
143+
}
144+
145+
var hasMatchingContentType = contentType.MediaType.Contains("text/html");
146+
if (!hasMatchingContentType)
147+
{
148+
throw new HttpRequestException($"Expected a response content-type header of {responseContentType}, but it only had {contentType.MediaType}.");
149+
}
109150
}
110151

111152
public class LimitedReadStreamWithMediaType : Stream

apps/pwabuilder/Controllers/ManifestsController.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ public class ManifestsController : ControllerBase
1414
/// <param name="manifestCreatorService">The manifest creation service.</param>
1515
/// <returns>A new web manifest for the specified site.</returns>
1616
[HttpPost("create")]
17-
public async Task<IActionResult> Create([FromQuery] Uri url, [FromServices] ManifestCreator manifestCreatorService)
17+
public async Task<IActionResult> Create([FromQuery] Uri url, [FromServices] ManifestCreator manifestCreatorService, CancellationToken cancellationToken)
1818
{
1919
if (!url.IsAbsoluteUri)
2020
{
2121
url = new Uri($"https://{url}", UriKind.Absolute);
2222
}
2323

24-
var manifest = await manifestCreatorService.Create(url);
24+
var manifest = await manifestCreatorService.Create(url, cancellationToken);
2525
return Ok(manifest);
2626
}
2727
}

apps/pwabuilder/Frontend/src/script/pages/app-report.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,10 @@ export class AppReport extends LitElement {
227227
}
228228

229229
private async applyManifestContext(url: string, manifestUrl?: string, manifestRaw?: string) {
230+
this.isAppCardInfoLoading = false;
230231
this.manifestContext = await this.processManifest(url, manifestUrl, manifestRaw);
231232
this.createdManifest = this.manifestContext.isGenerated || false;
232233
setManifestContext(this.manifestContext);
233-
this.isAppCardInfoLoading = false;
234234
await this.populateAppCard(this.manifestContext, manifestUrl);
235235
}
236236

apps/pwabuilder/Services/ManifestCreator.cs

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,12 @@ public ManifestCreator(IHttpClientFactory httpClientFactory, ILogger<ManifestCre
2525
/// Creates a web manifest for the specified site. It will use any metadata on the page to help generate a manifest.
2626
/// </summary>
2727
/// <param name="siteUri">The URL of the site to generate the manifest for.</param>
28+
/// <param name="cancelToken">The cancellation token.</param>
2829
/// <returns>A new web manifest.</returns>
29-
public async Task<WebAppManifest> Create(Uri siteUri)
30+
public async Task<WebAppManifest> Create(Uri siteUri, CancellationToken cancelToken)
3031
{
3132
// Fetch the site
32-
var manifestResult = await this.LoadPage(siteUri)
33+
var manifestResult = await this.LoadPage(siteUri, cancelToken)
3334
.PipeAsync(async html => await CreateManifestFromHtml(siteUri, html));
3435
if (manifestResult.Error != null)
3536
{
@@ -415,9 +416,9 @@ private static string GetMetaTagContent(HtmlNode? head, AttrQuery query, string
415416
return string.IsNullOrWhiteSpace(metaVal) ? fallbackValue : metaVal;
416417
}
417418

418-
private async Task<Result<HtmlDocument>> LoadPage(Uri url)
419+
private async Task<Result<HtmlDocument>> LoadPage(Uri url, CancellationToken cancelToken)
419420
{
420-
var fetchResult = await TryFetch(url, "text/html");
421+
var fetchResult = await TryFetch(url, ["text/html"], cancelToken);
421422
return fetchResult.Pipe(CreateDocumentFromHtml);
422423
}
423424

@@ -437,57 +438,48 @@ private static HtmlDocument CreateDocumentFromHtml(string html)
437438
/// <param name="url"></param>
438439
/// <param name="acceptHeaders"></param>
439440
/// <returns></returns>
440-
private async Task<Result<string>> TryFetch(Uri url, params string[] acceptHeaders)
441+
private async Task<Result<string>> TryFetch(Uri url, IEnumerable<string> acceptHeaders, CancellationToken cancelToken)
441442
{
443+
// First, make sure we have HTML. Read just the headers and make sure it has the right content-type response.
442444
try
443445
{
444-
using var httpRequest = new HttpRequestMessage(HttpMethod.Get, url);
445-
if (acceptHeaders != null)
446-
{
447-
foreach (var header in acceptHeaders)
448-
{
449-
httpRequest.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue(header));
450-
}
451-
}
446+
using var headersOnlyResponse = await http.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, cancelToken);
447+
headersOnlyResponse.EnsureSuccessStatusCode();
448+
headersOnlyResponse.EnsureContentType("text/html");
449+
}
450+
catch (Exception contentTypeError)
451+
{
452+
logger.LogError(contentTypeError, "Failed to verify {url} serves HTML content during manifest creation process. Will return an empty manifest.", url);
453+
return string.Empty;
454+
}
452455

453-
var httpResponse = await http.SendAsync(httpRequest);
454-
httpResponse.EnsureSuccessStatusCode();
455-
var content = await httpResponse.Content.ReadAsStringAsync();
456-
return content;
456+
try
457+
{
458+
var htmlResponse = await http.GetStringAsync(url, acceptHeaders, 1024 * 1024 * 2, cancelToken); // 2MB max HTML httpResponse.EnsureSuccessStatusCode();
459+
return htmlResponse ?? string.Empty;
457460
}
458461
catch (InvalidOperationException invalidOpError) when (invalidOpError.Message.Contains("The character set provided in ContentType is invalid."))
459462
{
460463
// Invalid encoding? Sometimes webpages have incorrectly set their charset / content type.
461464
// See if we can just parse the thing using UTF-8.
462465
logger.LogWarning(invalidOpError, "Unable to parse using HTTP client due to invalid ContentType. Attempting to parse using UTF-8.");
463-
return await TryFetchWithForcedUtf8(url, acceptHeaders);
466+
return await TryFetchWithForcedUtf8(url, acceptHeaders, cancelToken);
464467
}
465468
catch (Exception httpException)
466469
{
467470
logger.LogWarning(httpException, "Failed to fetch {url} using HTTP client. Falling back to HTTP/2 fetch.", url);
468-
return await TryFetchWithHttp2Client(url, acceptHeaders);
471+
return await TryFetchWithHttp2Client(url, acceptHeaders, cancelToken);
469472
}
470473
}
471474

472-
private async Task<Result<string>> TryFetchWithForcedUtf8(Uri url, params string[] acceptHeaders)
475+
private async Task<Result<string>> TryFetchWithForcedUtf8(Uri url, IEnumerable<string> acceptHeaders, CancellationToken cancelToken)
473476
{
474477
try
475478
{
476-
using var httpRequest = new HttpRequestMessage(HttpMethod.Get, url);
477-
if (acceptHeaders != null)
478-
{
479-
foreach (var header in acceptHeaders)
480-
{
481-
httpRequest.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue(header));
482-
}
483-
}
484-
485-
var httpResponse = await http.SendAsync(httpRequest);
486-
httpResponse.EnsureSuccessStatusCode();
487-
var contentBytes = await httpResponse.Content.ReadAsByteArrayAsync();
488-
var responseString = Encoding.UTF8.GetString(contentBytes);
489-
logger.LogInformation("Successfully parsed the HTML using forced UTF-8 mode");
490-
return responseString;
479+
var byteStream = await this.http.GetStreamAsync(url, acceptHeaders, 1024 * 1024 * 2, cancelToken);
480+
using var memStream = new MemoryStream();
481+
await byteStream.CopyToAsync(memStream, cancelToken);
482+
return Encoding.UTF8.GetString(memStream.ToArray());
491483
}
492484
catch (Exception error)
493485
{
@@ -496,7 +488,7 @@ private async Task<Result<string>> TryFetchWithForcedUtf8(Uri url, params string
496488
}
497489
}
498490

499-
private async Task<Result<string>> TryFetchWithHttp2Client(Uri url, params string[] acceptHeaders)
491+
private async Task<Result<string>> TryFetchWithHttp2Client(Uri url, IEnumerable<string> acceptHeaders, CancellationToken cancelToken)
500492
{
501493
try
502494
{

0 commit comments

Comments
 (0)