@@ -25,11 +25,12 @@ public ManifestCreator(IHttpClientFactory httpClientFactory, ILogger<ManifestCre
2525 /// Creates a web manifest for the specified site. It will use any metadata on the page to help generate a manifest.
2626 /// </summary>
2727 /// <param name="siteUri">The URL of the site to generate the manifest for.</param>
28+ /// <param name="cancelToken">The cancellation token.</param>
2829 /// <returns>A new web manifest.</returns>
29- public async Task < WebAppManifest > Create ( Uri siteUri )
30+ public async Task < WebAppManifest > Create ( Uri siteUri , CancellationToken cancelToken )
3031 {
3132 // Fetch the site
32- var manifestResult = await this . LoadPage ( siteUri )
33+ var manifestResult = await this . LoadPage ( siteUri , cancelToken )
3334 . PipeAsync ( async html => await CreateManifestFromHtml ( siteUri , html ) ) ;
3435 if ( manifestResult . Error != null )
3536 {
@@ -415,9 +416,9 @@ private static string GetMetaTagContent(HtmlNode? head, AttrQuery query, string
415416 return string . IsNullOrWhiteSpace ( metaVal ) ? fallbackValue : metaVal ;
416417 }
417418
418- private async Task < Result < HtmlDocument > > LoadPage ( Uri url )
419+ private async Task < Result < HtmlDocument > > LoadPage ( Uri url , CancellationToken cancelToken )
419420 {
420- var fetchResult = await TryFetch ( url , "text/html" ) ;
421+ var fetchResult = await TryFetch ( url , [ "text/html" ] , cancelToken ) ;
421422 return fetchResult . Pipe ( CreateDocumentFromHtml ) ;
422423 }
423424
@@ -437,57 +438,48 @@ private static HtmlDocument CreateDocumentFromHtml(string html)
437438 /// <param name="url"></param>
438439 /// <param name="acceptHeaders"></param>
439440 /// <returns></returns>
440- private async Task < Result < string > > TryFetch ( Uri url , params string [ ] acceptHeaders )
441+ private async Task < Result < string > > TryFetch ( Uri url , IEnumerable < string > acceptHeaders , CancellationToken cancelToken )
441442 {
443+ // First, make sure we have HTML. Read just the headers and make sure it has the right content-type response.
442444 try
443445 {
444- using var httpRequest = new HttpRequestMessage ( HttpMethod . Get , url ) ;
445- if ( acceptHeaders != null )
446- {
447- foreach ( var header in acceptHeaders )
448- {
449- httpRequest . Headers . Accept . Add ( new System . Net . Http . Headers . MediaTypeWithQualityHeaderValue ( header ) ) ;
450- }
451- }
446+ using var headersOnlyResponse = await http . GetAsync ( url , HttpCompletionOption . ResponseHeadersRead , cancelToken ) ;
447+ headersOnlyResponse . EnsureSuccessStatusCode ( ) ;
448+ headersOnlyResponse . EnsureContentType ( "text/html" ) ;
449+ }
450+ catch ( Exception contentTypeError )
451+ {
452+ logger . LogError ( contentTypeError , "Failed to verify {url} serves HTML content during manifest creation process. Will return an empty manifest." , url ) ;
453+ return string . Empty ;
454+ }
452455
453- var httpResponse = await http . SendAsync ( httpRequest ) ;
454- httpResponse . EnsureSuccessStatusCode ( ) ;
455- var content = await httpResponse . Content . ReadAsStringAsync ( ) ;
456- return content ;
456+ try
457+ {
458+ var htmlResponse = await http . GetStringAsync ( url , acceptHeaders , 1024 * 1024 * 2 , cancelToken ) ; // 2MB max HTML httpResponse.EnsureSuccessStatusCode ();
459+ return htmlResponse ?? string . Empty ;
457460 }
458461 catch ( InvalidOperationException invalidOpError ) when ( invalidOpError . Message . Contains ( "The character set provided in ContentType is invalid." ) )
459462 {
460463 // Invalid encoding? Sometimes webpages have incorrectly set their charset / content type.
461464 // See if we can just parse the thing using UTF-8.
462465 logger . LogWarning ( invalidOpError , "Unable to parse using HTTP client due to invalid ContentType. Attempting to parse using UTF-8." ) ;
463- return await TryFetchWithForcedUtf8 ( url , acceptHeaders ) ;
466+ return await TryFetchWithForcedUtf8 ( url , acceptHeaders , cancelToken ) ;
464467 }
465468 catch ( Exception httpException )
466469 {
467470 logger . LogWarning ( httpException , "Failed to fetch {url} using HTTP client. Falling back to HTTP/2 fetch." , url ) ;
468- return await TryFetchWithHttp2Client ( url , acceptHeaders ) ;
471+ return await TryFetchWithHttp2Client ( url , acceptHeaders , cancelToken ) ;
469472 }
470473 }
471474
472- private async Task < Result < string > > TryFetchWithForcedUtf8 ( Uri url , params string [ ] acceptHeaders )
475+ private async Task < Result < string > > TryFetchWithForcedUtf8 ( Uri url , IEnumerable < string > acceptHeaders , CancellationToken cancelToken )
473476 {
474477 try
475478 {
476- using var httpRequest = new HttpRequestMessage ( HttpMethod . Get , url ) ;
477- if ( acceptHeaders != null )
478- {
479- foreach ( var header in acceptHeaders )
480- {
481- httpRequest . Headers . Accept . Add ( new System . Net . Http . Headers . MediaTypeWithQualityHeaderValue ( header ) ) ;
482- }
483- }
484-
485- var httpResponse = await http . SendAsync ( httpRequest ) ;
486- httpResponse . EnsureSuccessStatusCode ( ) ;
487- var contentBytes = await httpResponse . Content . ReadAsByteArrayAsync ( ) ;
488- var responseString = Encoding . UTF8 . GetString ( contentBytes ) ;
489- logger . LogInformation ( "Successfully parsed the HTML using forced UTF-8 mode" ) ;
490- return responseString ;
479+ var byteStream = await this . http . GetStreamAsync ( url , acceptHeaders , 1024 * 1024 * 2 , cancelToken ) ;
480+ using var memStream = new MemoryStream ( ) ;
481+ await byteStream . CopyToAsync ( memStream , cancelToken ) ;
482+ return Encoding . UTF8 . GetString ( memStream . ToArray ( ) ) ;
491483 }
492484 catch ( Exception error )
493485 {
@@ -496,7 +488,7 @@ private async Task<Result<string>> TryFetchWithForcedUtf8(Uri url, params string
496488 }
497489 }
498490
499- private async Task < Result < string > > TryFetchWithHttp2Client ( Uri url , params string [ ] acceptHeaders )
491+ private async Task < Result < string > > TryFetchWithHttp2Client ( Uri url , IEnumerable < string > acceptHeaders , CancellationToken cancelToken )
500492 {
501493 try
502494 {
0 commit comments