Skip to content

Commit a6047a1

Browse files
committed
feat: #1600 提高超大文档的性能
1 parent 3110c04 commit a6047a1

File tree

6 files changed

+79
-11
lines changed

6 files changed

+79
-11
lines changed

.changeset/little-cougars-wink.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'cherry-markdown': patch
3+
---
4+
5+
feat: #1600 提高超大文档的性能

examples/assets/scripts/index-demo.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ const basicConfig = {
494494
editor: {
495495
id: 'cherry-text',
496496
name: 'cherry-text',
497-
autoSave2Textarea: true,
497+
autoSave2Textarea: false,
498498
defaultModel: 'edit&preview',
499499
showFullWidthMark: true, // 是否高亮全角符号 ·|¥|、|:|“|”|【|】|(|)|《|》
500500
showSuggestList: true, // 是否显示联想框

packages/cherry-markdown/src/Editor.js

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,19 @@ export default class Editor {
164164
const end = { line: targetLine, ch: targetChTo };
165165
return { bigString, begin, end, id };
166166
});
167+
168+
/**
169+
* 如果编辑器行数超过10000,则不再处理
170+
* 增加这个逻辑是为了避免性能问题,当超过1w行时,formatBigData2Mark耗费的性能会明显增加。后续在优化后可以去掉这个降级逻辑
171+
* 允许降级的理由:超过1w行的md基本已经不关心base64等数据是否缩略展示了
172+
*/
173+
if (this.editor.lineCount() > 10000) {
174+
return;
175+
}
167176
this.formatBigData2Mark(base64Reg, 'cm-url base64');
168177
this.formatBigData2Mark(imgDrawioXmlReg, 'cm-url drawio');
169-
this.formatBigData2Mark(longTextReg, 'cm-url long-text');
178+
// 长文本替换的正则性能太差,先注释掉
179+
// this.formatBigData2Mark(longTextReg, 'cm-url long-text');
170180
if (this.$cherry.options.editor.maxUrlLength > 10) {
171181
const [protocolUrlPattern, wwwUrlPattern] = createUrlReg(this.$cherry.options.editor.maxUrlLength);
172182
this.formatBigData2Mark(protocolUrlPattern, 'cm-url url-truncated');
@@ -612,7 +622,14 @@ export default class Editor {
612622
this.onCursorActivity();
613623
});
614624
editor.on('beforeChange', (codemirror) => {
615-
this.selectAll = this.editor.getValue() === codemirror.getSelection();
625+
// 判断是否是全选
626+
const { line: toLine, ch: toCh } = this.editor.getCursor('to');
627+
const { line: fromLine, ch: fromCh } = this.editor.getCursor('from');
628+
this.selectAll =
629+
fromLine === 0 &&
630+
fromCh === 0 &&
631+
toLine === this.editor.lineCount() - 1 &&
632+
toCh === this.editor.getLine(toLine).length;
616633
});
617634

618635
addEvent(

packages/cherry-markdown/src/Engine.js

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,13 @@ export default class Engine {
301301
canContinue = false;
302302
}
303303
}
304-
return oneHook[action](newMd, actionArgs, this.markdownParams);
304+
// const time = Date.now();
305+
const ret = oneHook[action](newMd, actionArgs, this.markdownParams);
306+
// const cost = Date.now() - time;
307+
// if (cost > 50) {
308+
// console.log(`hook ${oneHook.getName()} ${action} cost ${Date.now() - time}ms`);
309+
// }
310+
return ret;
305311
}, $md);
306312
} catch (e) {
307313
throw new NestedError(e);
@@ -316,13 +322,17 @@ export default class Engine {
316322
return this.hash(str);
317323
}
318324

325+
sha256(str) {
326+
return CryptoJS.SHA256(str).toString();
327+
}
328+
319329
/**
320330
* 计算哈希值
321331
* @param {String} str 被计算的字符串
322332
* @returns {String} 哈希值
323333
*/
324334
hash(str) {
325-
// 当缓存队列比较大时,随机抛弃500个缓存
335+
// 当缓存队列比较大时,随机抛弃一些缓存
326336
if (this.hashStrMap.size > 2000) {
327337
const keys = Array.from(this.hashStrMap.keys()).slice(0, 200);
328338
keys.forEach((key) => this.hashStrMap.delete(key));
@@ -366,11 +376,18 @@ export default class Engine {
366376
this.cachedBigData[cacheKey] = m2;
367377
return `${m1}${cacheKey}}`;
368378
});
369-
$md = $md.replace(longTextReg, (whole, m1, m2) => {
370-
const cacheKey = `bigDataBegin${this.hash(m2)}bigDataEnd`;
371-
this.cachedBigData[cacheKey] = m2;
372-
return `${m1}${cacheKey}}`;
373-
});
379+
380+
const tmpArr = $md.split(/\n/);
381+
for (let i = 0; i < tmpArr.length; i++) {
382+
if (tmpArr[i].length > 6000) {
383+
tmpArr[i] = tmpArr[i].replace(longTextReg, (whole) => {
384+
const cacheKey = `bigDataBegin${this.hash(whole)}bigDataEnd`;
385+
this.cachedBigData[cacheKey] = whole;
386+
return cacheKey;
387+
});
388+
}
389+
}
390+
$md = tmpArr.join('\n');
374391
$md = $md.replace(pasteWrapperReg, '');
375392
return $md;
376393
}

packages/cherry-markdown/src/core/hooks/HtmlBlock.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ export default class HtmlBlock extends ParagraphBase {
4545
super({ needCache: true });
4646
this.filterStyle = config.filterStyle || false;
4747
this.removeTrailingNewline = config.removeTrailingNewline || false;
48+
this.cacheData = {};
49+
this.cacheDataMap = [];
4850
}
4951

5052
// ref: http://www.vfmd.org/vfmd-spec/specification/#procedure-for-detecting-automatic-links
@@ -218,6 +220,33 @@ export default class HtmlBlock extends ParagraphBase {
218220
}
219221
config.HTML_INTEGRATION_POINTS.foreignobject = true;
220222

223+
const $strArr = $str.split('\n');
224+
// 如果内容很大,则分批处理,用空间换sanitizer.sanitize消耗的时间
225+
const batch = 100;
226+
// 最大缓存容量(冗余20%)
227+
const maxCacheLength = Math.round((1.2 * $strArr.length) / batch);
228+
if ($strArr.length > batch) {
229+
const ret = [];
230+
for (let i = 0; i < $strArr.length; i += batch) {
231+
const batchStr = $strArr.slice(i, i + batch).join('\n');
232+
if (!this.cacheData[batchStr]) {
233+
/**
234+
* 缓存太多时,清空最近插入的一些缓存
235+
* - 为什么是“最近的”,主要考虑流式输出场景
236+
*/
237+
if (this.cacheDataMap.length > maxCacheLength) {
238+
const removed = this.cacheDataMap.splice(0, 10);
239+
removed.forEach((key) => {
240+
delete this.cacheData[key];
241+
});
242+
}
243+
this.cacheData[batchStr] = sanitizer.sanitize(batchStr, config);
244+
this.cacheDataMap.push(batchStr);
245+
}
246+
ret.push(this.cacheData[batchStr]);
247+
}
248+
return ret.join('\n');
249+
}
221250
return sanitizer.sanitize($str, config);
222251
}
223252
}

packages/cherry-markdown/src/utils/regexp.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ export const imgBase64Reg = /(\[[^\n]*?\]\(data:image\/[a-z]{1,10};base64,)([^)]
268268
export const base64Reg = /(data:image\/[a-z]{1,10};base64,)([0-9a-zA-Z+/=]+)/g;
269269

270270
// 匹配内容非常多的单行文本,为了避免表格的场景,所以特意避免表格的识别
271-
export const longTextReg = /([^\n]{100})([^\n|`\s]{5900,})/g;
271+
export const longTextReg = /[^\n|`\s]{6000,}/g;
272272

273273
/**
274274
* 创建匹配markdown中URL链接的正则表达式

0 commit comments

Comments
 (0)