Skip to content

Commit 8ccf7e3

Browse files
committed
Add URL and string helpers, extend sitemap provider
Updated WebsiteDiscoveryProvider to support generating an llms.txt file and refactored sitemap page retrieval for improved caching and detail.
1 parent 6c29ef3 commit 8ccf7e3

File tree

5 files changed

+165
-17
lines changed

5 files changed

+165
-17
lines changed

src/Extensions/UrlExtensions.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
namespace XperienceCommunity.SEO.Extensions;
2+
3+
public static class UrlExtensions
4+
{
5+
public static string RelativePathTrimmed(this WebPageUrl pageUrl) => pageUrl.RelativePath.TrimStart('~');
6+
7+
public static string AbsoluteURL(this WebPageUrl pageUrl, HttpRequest currentRequest) =>
8+
$"{currentRequest.Scheme}://{currentRequest.Host}{currentRequest.PathBase}{pageUrl.RelativePathTrimmed()}";
9+
10+
public static string AbsoluteURL(this string relativeUrl, HttpRequest currentRequest) =>
11+
$"{currentRequest.Scheme}://{currentRequest.Host}{currentRequest.PathBase}/{relativeUrl.TrimStart('~')}";
12+
}

src/GlobalUsing.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,7 @@
1313

1414
global using XperienceCommunity.SEO.Models;
1515
global using XperienceCommunity.SEO.Services;
16+
global using XperienceCommunity.SEO.Helpers;
17+
global using XperienceCommunity.SEO.Extensions;
18+
19+
global using System.Text;

src/Helpers/StringHelper.cs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
using System.Text.RegularExpressions;
2+
3+
namespace XperienceCommunity.SEO.Helpers;
4+
5+
public static class StringHelper
6+
{
7+
/// <summary>
8+
/// Truncates a string to the specified maximum length and appends a truncation suffix if necessary.
9+
/// </summary>
10+
/// <param name="value"></param>
11+
/// <param name="maxLength"></param>
12+
/// <param name="truncationSuffix"></param>
13+
/// <returns></returns>
14+
public static string? Truncate(this string? value, int maxLength, string truncationSuffix = "…")
15+
{
16+
if (string.IsNullOrWhiteSpace(value))
17+
{
18+
return "";
19+
}
20+
21+
return value?.Length > maxLength
22+
? value[..maxLength] + truncationSuffix
23+
: value;
24+
}
25+
26+
/// <summary>
27+
/// Cleans HTML tags and extra whitespace from the input string.
28+
/// </summary>
29+
/// <param name="input"></param>
30+
/// <returns></returns>
31+
public static string CleanHtml(this string input)
32+
{
33+
if (string.IsNullOrWhiteSpace(input))
34+
{
35+
return string.Empty;
36+
}
37+
38+
// Remove HTML tags if any are still present
39+
string noHtml = Regex.Replace(input, "<.*?>", string.Empty);
40+
41+
// Replace multiple whitespace characters with a single space
42+
string noExtraWhitespace = Regex.Replace(noHtml, @"\s+", " ");
43+
44+
// Trim leading and trailing whitespace
45+
return noExtraWhitespace.Trim();
46+
}
47+
48+
/// <summary>
49+
/// Replaces any special character (non-alphanumeric), whitespace, and underscores with hyphens.
50+
/// Multiple consecutive special characters are replaced with a single hyphen.
51+
/// Leading and trailing hyphens are removed.
52+
/// </summary>
53+
/// <param name="input">The input string to process</param>
54+
/// <returns>A string with special characters replaced by hyphens</returns>
55+
public static string ReplaceSpecialChars(this string input)
56+
{
57+
if (string.IsNullOrEmpty(input))
58+
{
59+
return input;
60+
}
61+
62+
// Replace any non-alphanumeric character (including whitespace and _) with hyphen
63+
string result = Regex.Replace(input, @"[^a-zA-Z0-9]", "-");
64+
65+
return result;
66+
}
67+
68+
/// <summary>
69+
/// Escapes Markdown special characters in the input string.
70+
/// </summary>
71+
/// <param name="text"></param>
72+
/// <returns></returns>
73+
public static string EscapeMarkdown(this string text)
74+
{
75+
if (string.IsNullOrEmpty(text))
76+
{
77+
return text;
78+
}
79+
80+
// Escape all Markdown special characters using regex
81+
string pattern = @"([\\`*_{}[\]()#+-.!])";
82+
83+
return Regex.Replace(text, pattern, "\\$1");
84+
}
85+
}

src/Services/IWebsiteDiscoveryProvider.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ public interface IWebsiteDiscoveryProvider
44
{
55
public Task<List<SitemapNode>> GetSitemapPages();
66
public Task<ActionResult> GenerateSitemap();
7+
public Task<ActionResult> GenerateLlmsTxt();
78
}

src/Services/WebsiteDiscoveryProvider.cs

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ public class WebsiteDiscoveryProvider(
66
IProgressiveCache cache,
77
IWebsiteChannelContext website,
88
IContentQueryExecutor executor,
9-
IWebsiteDiscoveryOptions options) : IWebsiteDiscoveryProvider
9+
IWebsiteDiscoveryOptions options,
10+
IHttpContextAccessor httpContextAccessor) : IWebsiteDiscoveryProvider
1011
{
1112
private readonly IProgressiveCache cache = cache;
1213
private readonly IWebsiteChannelContext website = website;
1314
private readonly IContentQueryExecutor executor = executor;
1415
private readonly IWebsiteDiscoveryOptions options = ValidateOptions(options);
16+
private readonly IHttpContextAccessor httpContextAccessor = httpContextAccessor;
1517

1618
private static IWebsiteDiscoveryOptions ValidateOptions(IWebsiteDiscoveryOptions options)
1719
{
@@ -50,23 +52,51 @@ public async Task<ActionResult> GenerateSitemap()
5052
var sitemapItems = await GetSitemapPages();
5153
return new SitemapProvider().CreateSitemap(new SitemapModel(sitemapItems));
5254
}
53-
public async Task<List<SitemapNode>> GetSitemapPages() =>
55+
56+
public async Task<ActionResult> GenerateLlmsTxt()
57+
{
58+
var pages = await GetSitemapPagesWithDetails();
59+
var sb = new StringBuilder();
60+
var currentRequest = httpContextAccessor.HttpContext?.Request;
61+
62+
sb.AppendLine($"# {website.WebsiteChannelName}");
63+
sb.AppendLine();
64+
sb.AppendLine("## Pages");
65+
sb.AppendLine();
66+
67+
foreach (var page in pages)
68+
{
69+
string title = !string.IsNullOrWhiteSpace(page.Title) ? page.Title : page.SystemFields.WebPageItemName;
70+
string relativeUrl = page.SystemFields.WebPageUrlPath;
71+
string url = currentRequest != null ? relativeUrl.AbsoluteURL(currentRequest) : relativeUrl;
72+
73+
if (!string.IsNullOrWhiteSpace(page.Description))
74+
{
75+
sb.AppendLine($"- [{title.CleanHtml().EscapeMarkdown().Replace("’s", "")}]({url.ToLower()}): {page.Description.CleanHtml().EscapeMarkdown().Replace("’s", "")}");
76+
}
77+
else
78+
{
79+
sb.AppendLine($"- [{title.CleanHtml().EscapeMarkdown().Replace("’s", "")}]({url.ToLower()})");
80+
}
81+
}
82+
83+
return new ContentResult
84+
{
85+
Content = sb.ToString(),
86+
ContentType = "text/plain; charset=utf-8"
87+
};
88+
}
89+
90+
private async Task<List<SitemapPage>> GetSitemapPagesWithDetails() =>
5491
await cache.LoadAsync(cs =>
5592
{
5693
cs.CacheDependency = CacheHelper.GetCacheDependency(BuildCacheDependencyKeys());
5794

58-
return GetSitemapNodesInternal();
59-
}, new CacheSettings(3, [nameof(GetSitemapPages)]) { });
95+
return GetSitemapPagesInternal();
96+
}, new CacheSettings(60, [nameof(GetSitemapPagesWithDetails)]) { });
6097

61-
private string[] BuildCacheDependencyKeys() =>
62-
options.ContentTypeDependencies
63-
.Select(t => $"webpageitem|bychannel|{website.WebsiteChannelName}|bycontenttype|{t}")
64-
.ToArray();
65-
66-
private async Task<List<SitemapNode>> GetSitemapNodesInternal()
98+
private async Task<List<SitemapPage>> GetSitemapPagesInternal()
6799
{
68-
var nodes = new List<SitemapNode>();
69-
70100
var b = new ContentItemQueryBuilder()
71101
.ForContentTypes(c => c
72102
.OfReusableSchema(options.ReusableSchemaName)
@@ -104,13 +134,29 @@ private async Task<List<SitemapNode>> GetSitemapNodesInternal()
104134
}, isInSitemap, title, description);
105135
});
106136

107-
foreach (var page in pages)
137+
return pages.Where(p => p.IsInSitemap).ToList();
138+
}
139+
140+
public async Task<List<SitemapNode>> GetSitemapPages() =>
141+
await cache.LoadAsync(cs =>
108142
{
109-
if (!page.IsInSitemap)
110-
{
111-
continue;
112-
}
143+
cs.CacheDependency = CacheHelper.GetCacheDependency(BuildCacheDependencyKeys());
144+
145+
return GetSitemapNodesInternal();
146+
}, new CacheSettings(60, [nameof(GetSitemapPages)]) { });
147+
148+
private string[] BuildCacheDependencyKeys() =>
149+
options.ContentTypeDependencies
150+
.Select(t => $"webpageitem|bychannel|{website.WebsiteChannelName}|bycontenttype|{nameof(WebsiteDiscoveryProvider)}|{t}")
151+
.ToArray();
113152

153+
private async Task<List<SitemapNode>> GetSitemapNodesInternal()
154+
{
155+
var pages = await GetSitemapPagesInternal();
156+
var nodes = new List<SitemapNode>();
157+
158+
foreach (var page in pages)
159+
{
114160
var node = new SitemapNode(page.SystemFields.WebPageUrlPath)
115161
{
116162
LastModificationDate = DateTime.Now,

0 commit comments

Comments
 (0)