Skip to content

Commit 5663897

Browse files
committed
Put escaped text into content directly, skip markdown conversion step
1 parent d1dd883 commit 5663897

File tree

4 files changed

+135
-12
lines changed

4 files changed

+135
-12
lines changed

src/Fetcher.js

+7-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ const xmlParser = new XMLParser({
1818
ignoreAttributes: false,
1919
allowBooleanAttributes: true,
2020
parseAttributeValue: true,
21+
processEntities: false, // disable this, was causing inconsistencies in Bluesky entries
22+
// htmlEntities: true,
2123
});
2224

2325
class Fetcher {
@@ -58,6 +60,10 @@ class Fetcher {
5860
return base64Hash.replace(/[^A-Z0-9]/gi, "").slice(0, HASH_FILENAME_MAXLENGTH);
5961
}
6062

63+
static parseXml(content) {
64+
return xmlParser.parse(content);
65+
}
66+
6167
#cacheDuration = "0s";
6268
#directoryManager;
6369
#assetsFolder = "assets";
@@ -249,7 +255,7 @@ class Fetcher {
249255

250256
return EleventyFetch(url, opts).then(result => {
251257
if(opts.type === "xml") {
252-
return xmlParser.parse(result);
258+
return Fetcher.parseXml(result);
253259
}
254260

255261
return result;

src/Importer.js

+33-7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import fs from "graceful-fs";
44
import yaml from "js-yaml";
55
import kleur from "kleur";
66
import slugify from '@sindresorhus/slugify';
7+
import * as entities from "entities";
78

89
import { Logger } from "./Logger.js";
910
import { Fetcher } from "./Fetcher.js";
@@ -204,13 +205,27 @@ class Importer {
204205
}
205206

206207
addDataOverride(type, url, data) {
208+
let found = false;
207209
for(let source of this.getSourcesForType(type)) {
208210
source.setDataOverride(url, data);
211+
found = true;
209212
}
213+
214+
if(!found) {
215+
throw new Error("addDataOverride(type) not found: " + type)
216+
}
217+
}
218+
219+
static shouldUseMarkdownFileExtension(entry) {
220+
return this.isText(entry) || this.isHtml(entry);
210221
}
211222

212223
static shouldConvertToMarkdown(entry) {
213-
return this.isHtml(entry) || entry.contentType === "text";
224+
return this.isHtml(entry);
225+
}
226+
227+
static isText(entry) {
228+
return entry.contentType === "text";
214229
}
215230

216231
static isHtml(entry) {
@@ -235,11 +250,13 @@ class Importer {
235250
}
236251

237252
async getEntries(options = {}) {
253+
let isWritingToMarkdown = options.contentType === "markdown";
254+
238255
let entries = [];
239256
for(let source of this.sources) {
240257
for(let entry of await source.getEntries()) {
241258
let contentType = entry.contentType;
242-
if(Importer.shouldConvertToMarkdown(entry) && options.contentType === "markdown") {
259+
if(Importer.shouldUseMarkdownFileExtension(entry) && isWritingToMarkdown) {
243260
contentType = "markdown";
244261
}
245262

@@ -257,14 +274,23 @@ class Importer {
257274
await this.fetchRelatedMedia(entry);
258275

259276
if(Importer.isHtml(entry)) {
260-
entry.content = await this.htmlTransformer.transform(entry.content, entry);
277+
let decodedHtml = entities.decodeHTML(entry.content);
278+
entry.content = await this.htmlTransformer.transform(decodedHtml, entry);
261279
}
262-
if(Importer.shouldConvertToMarkdown(entry) && options.contentType === "markdown") {
263-
await this.markdownService.asyncInit();
264280

265-
entry.content = await this.markdownService.toMarkdown(entry.content, entry);
281+
if(isWritingToMarkdown) {
282+
if(Importer.isText(entry)) {
283+
// _only_ decode newlines
284+
entry.content = entry.content.split("
").join("\n");
285+
}
286+
287+
if(Importer.shouldConvertToMarkdown(entry)) {
288+
await this.markdownService.asyncInit();
266289

267-
entry.contentType = "markdown";
290+
entry.content = await this.markdownService.toMarkdown(entry.content, entry);
291+
292+
entry.contentType = "markdown";
293+
}
268294
}
269295

270296
return entry;

test/sources/bluesky-test.xml

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<rss version="2.0"><channel><description>🌍 https://zachleat.com&#xA;🎈🐀 Creator/Maintainer of @11ty.dev&#xA;🎉 Builder at Font Awesome&#xA;🏳️‍⚧️ Listen to Trans Folks&#xA;👋🏻 He/him/they&#xA;🐘 Mastodon https://zachleat.com/@zachleat&#xA;✅ Front of the Front-end&#xA;✅ Static Sites&#xA;✅ Web Components&#xA;✅ Web Performance</description><link>https://bsky.app/profile/zachleat.com</link><title>@zachleat.com - Zach Leatherman</title><item><link>https://bsky.app/profile/zachleat.com/post/3lckusgtkuk2r</link><description>time to review my HTML wrapped 2024&#xA;&#xA;Most used: &lt;a&gt;&#xA;Doing work to reduce infrastructure bills: &lt;picture&gt;&#xA;Underrated: &lt;output&gt;&#xA;Misunderstood: &lt;details&gt;&#xA;Tame but a small win: &lt;search&gt;&#xA;Hope the design never calls for it: &lt;dialog&gt;&#xA;Not today Satan: &lt;canvas&gt;&#xA;Pure vibes: &lt;noscript&gt;</description><pubDate>05 Dec 2024 14:26 +0000</pubDate><guid isPermaLink="false">at://did:plc:xpchjovbk6sxl3bv74z7cs54/app.bsky.feed.post/3lckusgtkuk2r</guid></item></channel></rss>

test/test.js

+93-4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ import { DataSource } from "../src/DataSource.js";
99
import { Persist } from "../src/Persist.js";
1010
import { Fetcher } from "../src/Fetcher.js";
1111

12+
function cleanContent(content) {
13+
// trim extra whitespace (dirty workaround for trailing whitespace)
14+
return content.split("\n").map(line => line.trim()).join("\n");
15+
}
16+
1217
const require = createRequire(import.meta.url);
1318

1419
test("YouTube user", async (t) => {
@@ -20,17 +25,62 @@ test("YouTube user", async (t) => {
2025
importer.addSource("youtubeUser", "UCskGTioqrMBcw8pd14_334A");
2126

2227
let stubContent = fs.readFileSync("./test/sources/youtube-user.xml");
23-
importer.addDataOverride("wordpress", "https://www.youtube.com/feeds/videos.xml?channel_id=UCskGTioqrMBcw8pd14_334A", stubContent);
28+
importer.addDataOverride("youtube", "https://www.youtube.com/feeds/videos.xml?channel_id=UCskGTioqrMBcw8pd14_334A", Fetcher.parseXml(stubContent.toString("utf8")));
2429

25-
let entries = await importer.getEntries();
30+
let entries = await importer.getEntries({ contentType: "markdown" });
2631
assert.equal(entries.length, 15);
2732

2833
let [post] = entries;
34+
2935
assert.deepEqual(Object.keys(post).sort(), ["authors", "content", "contentType", "date", "dateUpdated", "filePath", "title", "type", "url", "uuid"]);
3036
assert.equal(post.content.length, 812);
37+
assert.equal(post.content, `CloudCannon is the Recommended CMS Partner of 11ty:
38+
39+
https://cloudcannon.com/11tyconf/
40+
https://cloudcannon.com/blog/how-to-manage-hundreds-of-connected-websites-with-a-git-based-headless-cms/
41+
42+
This was a talk given at the 11ty International Symposium on Making Web Sites Real Good (2024): https://conf.11ty.dev/2024/managing-content-management/
43+
44+
If Jamstack has taught us anything, it’s that websites work best when they’re generated from folders full of flat files. Even massively interconnected websites!
45+
46+
We talk through a classically Jamstacky approach to content management for large organizations: mounting shared layout and component repositories, creating a central content lake to aggregate content like news articles, and automating site builds and deployments when your content or dependencies change.`);
47+
3148
assert.equal(post.authors[0].name, "Eleventy");
3249
});
3350

51+
test("Bluesky posts", async (t) => {
52+
let importer = new Importer();
53+
54+
importer.setVerbose(false);
55+
importer.setDryRun(true);
56+
57+
importer.addSource("bluesky", "zachleat.com");
58+
59+
let stubContent = fs.readFileSync("./test/sources/bluesky-test.xml");
60+
61+
importer.addDataOverride("bluesky", "https://bsky.app/profile/zachleat.com/rss", Fetcher.parseXml(stubContent.toString("utf8")));
62+
63+
let entries = await importer.getEntries({ contentType: "markdown" });
64+
assert.equal(entries.length, 1);
65+
66+
let [post] = entries;
67+
68+
assert.deepEqual(Object.keys(post).sort(), ["authors", "content", "contentType", "date", "filePath", "title", "type", "url", "uuid"]);
69+
assert.equal(post.content.length, 323);
70+
assert.equal(post.content, `time to review my HTML wrapped 2024
71+
72+
Most used: &lt;a&gt;
73+
Doing work to reduce infrastructure bills: &lt;picture&gt;
74+
Underrated: &lt;output&gt;
75+
Misunderstood: &lt;details&gt;
76+
Tame but a small win: &lt;search&gt;
77+
Hope the design never calls for it: &lt;dialog&gt;
78+
Not today Satan: &lt;canvas&gt;
79+
Pure vibes: &lt;noscript&gt;`);
80+
81+
assert.equal(post.authors[0].name, "@zachleat.com - Zach Leatherman");
82+
});
83+
3484
test("WordPress import", async (t) => {
3585

3686
let importer = new Importer();
@@ -51,12 +101,51 @@ test("WordPress import", async (t) => {
51101
importer.addDataOverride("wordpress", "https://blog.fontawesome.com/wp-json/wp/v2/categories/1", require("./sources/blog-awesome-categories.json"));
52102
importer.addDataOverride("wordpress", "https://blog.fontawesome.com/wp-json/wp/v2/users/155431370", require("./sources/blog-awesome-author.json"));
53103

54-
let entries = await importer.getEntries();
104+
let entries = await importer.getEntries({ contentType: "markdown" });
55105
assert.equal(entries.length, 1);
56106

57107
let [post] = entries;
58108
assert.deepEqual(Object.keys(post).sort(), ["authors", "content", "contentType", "date", "dateUpdated", "filePath", "metadata", "status", "title", "type", "url", "uuid"]);
59-
assert.equal(post.content.length, 6134);
109+
110+
assert.equal(cleanContent(post.content), `We’re so close to launching version 6, and we figured it was high time to make an official announcement. So, save the date for February. Font Awesome 6 will go beyond pure icon-imagination!
111+
112+
![](assets/image-calendar-exclamation-2-eKNZqhhuChge.png)
113+
114+
Save the date! February 2022 is just around the corner!
115+
116+
So, what’s new?
117+
118+
* * *
119+
120+
## More Icons
121+
122+
Font Awesome 6 contains over 7,000 new icons, so you’re sure to find what you need for your project. Plus, we’ve redesigned most of our icons from scratch, so they’re more consistent and easier to use.
123+
124+
![](assets/image-icons-2-66KjmgCOuZQw.png)
125+
126+
* * *
127+
128+
## More Styles
129+
130+
Font Awesome 6 includes five icons styles: solid, regular, light, duotone, and the new THIN style — not to mention all of our brand icons. And coming later in 2022 is the entirely new SHARP family of styles.
131+
132+
![](assets/image-styles-2-SNjQOsXaJuRQ.png)
133+
134+
* * *
135+
136+
## More Ways to Use
137+
138+
Font Awesome 6 makes it even easier to use icons where you want to. More plugins and packages to match your stack. Less time wrestling browser rendering.
139+
140+
![](assets/image-awesome-2-1AOLfzrlbkMJ.png)
141+
142+
* * *
143+
144+
We’ll keep fine-tuning that sweet, sweet recipe until February. Believe us; the web’s going to have a new scrumpdillyicious secret ingredient!
145+
146+
[Check Out the Beta!](https://fontawesome.com/v6.0)`);
147+
148+
assert.equal(post.content.length, 1304);
60149
assert.equal(post.authors[0].name, "Matt Johnson");
61150
});
62151

0 commit comments

Comments
 (0)