Skip to content

Commit 1867fff

Browse files
committed
Normative: add RegExp.escape (#3382)
1 parent f2b2d52 commit 1867fff

11 files changed

+173
-7
lines changed

.github/workflows/build.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ jobs:
1010
steps:
1111
- uses: actions/checkout@v3
1212
- uses: ljharb/actions/node/install@d9f477827ed71a259056764107f74afc29febcae
13-
name: 'nvm install lts/* && npm ci'
13+
name: 'nvm install lts/* && npm ci --no-audit'
14+
env:
15+
NPM_CONFIG_AUDIT: false
1416
with:
1517
node-version: lts/*
1618
use-npm-ci: true

.github/workflows/deploy.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ jobs:
1414
steps:
1515
- uses: actions/checkout@v3
1616
- uses: ljharb/actions/node/install@d9f477827ed71a259056764107f74afc29febcae
17-
name: 'nvm install lts/* && npm ci'
17+
name: 'nvm install lts/* && npm ci --no-audit'
18+
env:
19+
NPM_CONFIG_AUDIT: false
1820
with:
1921
node-version: lts/*
2022
use-npm-ci: true

.github/workflows/enforce-format.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ jobs:
1010
steps:
1111
- uses: actions/checkout@v3
1212
- uses: ljharb/actions/node/install@d9f477827ed71a259056764107f74afc29febcae
13-
name: 'nvm install lts/* && npm ci'
13+
name: 'nvm install lts/* && npm ci --no-audit'
14+
env:
15+
NPM_CONFIG_AUDIT: false
1416
with:
1517
node-version: lts/*
1618
use-npm-ci: true

.github/workflows/ipr.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ jobs:
1616
steps:
1717
- uses: actions/checkout@v3
1818
- uses: ljharb/actions/node/install@d9f477827ed71a259056764107f74afc29febcae
19-
name: 'nvm install lts/* && npm ci'
19+
name: 'nvm install lts/* && npm ci --no-audit'
20+
env:
21+
NPM_CONFIG_AUDIT: false
2022
with:
2123
node-version: lts/*
2224
use-npm-ci: true

.github/workflows/preview-build.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ jobs:
1010
steps:
1111
- uses: actions/checkout@v3
1212
- uses: ljharb/actions/node/install@d9f477827ed71a259056764107f74afc29febcae
13-
name: 'nvm install lts/* && npm ci'
13+
name: 'nvm install lts/* && npm ci --no-audit'
14+
env:
15+
NPM_CONFIG_AUDIT: false
1416
with:
1517
node-version: lts/*
1618
use-npm-ci: true

.github/workflows/preview.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ jobs:
2020
steps:
2121
- uses: actions/checkout@v3
2222
- uses: ljharb/actions/node/install@d9f477827ed71a259056764107f74afc29febcae
23-
name: 'nvm install lts/* && npm ci'
23+
name: 'nvm install lts/* && npm ci --no-audit'
24+
env:
25+
NPM_CONFIG_AUDIT: false
2426
with:
2527
node-version: lts/*
2628
use-npm-ci: true

.github/workflows/publish-biblio.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
registry-url: 'https://registry.npmjs.org'
2525

2626
- name: Install dependencies
27-
run: npm ci
27+
run: npm ci --no-audit
2828

2929
- name: Publish biblio
3030
run: scripts/publish-biblio.sh

.github/workflows/spellcheck.yml

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: 'ecma-262'
2+
3+
on: [pull_request]
4+
5+
jobs:
6+
spellcheck:
7+
name: 'check for newly-introduced spelling errors'
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- run: sudo apt-get install aspell
12+
- uses: actions/checkout@v3
13+
with:
14+
# Number of commits to fetch. 0 indicates all history for all branches and tags.
15+
# Default: 1
16+
fetch-depth: 0
17+
- uses: ljharb/actions/node/install@d9f477827ed71a259056764107f74afc29febcae
18+
name: 'nvm install lts/* && npm ci --no-audit'
19+
env:
20+
NPM_CONFIG_AUDIT: false
21+
with:
22+
node-version: lts/*
23+
use-npm-ci: true
24+
- run: node scripts/spellcheck.mjs origin/"${GITHUB_BASE_REF}"

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ node_modules/
22
out/
33
vendor/esmeta
44
test*.js
5+
aspell.txt
56

67
# lockfiles we don't use are ignored
78
npm-shrinkwrap.json

scripts/spellcheck.mjs

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { promisify } from 'node:util';
2+
import { exec } from 'node:child_process';
3+
const execP = promisify(exec);
4+
import { writeFile } from 'node:fs/promises';
5+
6+
const MIN_WORD_SIZE = 3;
7+
8+
const BASE_REF = process.argv[2];
9+
const ASPELL_OPTS = [
10+
'--add-html-check=alt,title,caption,variants',
11+
'--ignore-case',
12+
'--master=en_GB-ize',
13+
'--mode=html',
14+
'--run-together',
15+
'--run-together-limit=99',
16+
'--run-together-min=2',
17+
'list',
18+
].join(' ');
19+
20+
function makeDict(words) {
21+
return `personal_ws-1.1 en ${words.length}\n${words.join('\n')}`;
22+
}
23+
24+
function lines(text) {
25+
if (text.length === 0) return [];
26+
return text.split('\n');
27+
}
28+
29+
console.log(`base ref: ${BASE_REF}`);
30+
31+
let { stdout } = await execP(`git show "${BASE_REF}":spec.html | aspell ${ASPELL_OPTS} | sort -fu`);
32+
33+
let existingWords = lines(stdout.trim());
34+
35+
let existingComponents = Array.from(new Set(
36+
existingWords
37+
.flatMap(word => [...word.matchAll(/(?:^[a-z]|[A-Z])[a-z]{2,}/g)])
38+
.map(([w]) => w.toLowerCase())
39+
));
40+
41+
({ stdout } = await execP(`echo ${existingComponents.map(w => JSON.stringify(w)).join(' ')} | aspell ${ASPELL_OPTS} | sort -fu`));
42+
43+
let existingComponentsReduced = lines(stdout.trim());
44+
45+
await writeFile('aspell.txt', makeDict(existingComponentsReduced));
46+
47+
({ stdout } = await execP(`echo ${existingWords.map(w => JSON.stringify(w)).join(' ')} | aspell --personal=./aspell.txt ${ASPELL_OPTS}`));
48+
49+
let novel = [...existingComponentsReduced, ...lines(stdout.trim())].filter(w => w.length >= MIN_WORD_SIZE);
50+
novel.sort();
51+
console.log(`\npreviously used novel words: ${novel.join(', ')}`);
52+
await writeFile('aspell.txt', makeDict(novel));
53+
54+
({ stdout } = await execP(`aspell --personal=./aspell.txt ${ASPELL_OPTS} list <spec.html | sort -u`));
55+
let misspellings = lines(stdout.trim()).filter(w => w.length >= MIN_WORD_SIZE);
56+
57+
if (misspellings.length > 0) {
58+
console.log(`\nmisspellings: ${misspellings.join(', ')}`);
59+
let pattern = misspellings.map(w => `-e ${JSON.stringify(w)}`).join(' --or ');
60+
({ stdout } = await execP(`git grep --line-number --column --fixed-strings --only-matching ${pattern} -- spec.html`));
61+
62+
console.log('');
63+
64+
let info = lines(stdout.trim());
65+
for (let warning of info) {
66+
let [match, file, line, col, typo] = warning.match(/^([^:]+):(\d+):(\d+):(.*)$/);
67+
let title = 'Potential Typo';
68+
let message = `${JSON.stringify(typo)} is not a previously used word or composed of previously used words. Perhaps it is a typo?`;
69+
// https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
70+
console.log(`::warning file=${file},line=${line},endLine=${line},col=${col},endColumn=${col + typo.length},title=${title}::${message}`);
71+
}
72+
}

spec.html

+57
Original file line numberDiff line numberDiff line change
@@ -37827,6 +37827,63 @@ <h1>Properties of the RegExp Constructor</h1>
3782737827
<li>has the following properties:</li>
3782837828
</ul>
3782937829

37830+
<emu-clause id="sec-regexp.escape">
37831+
<h1>RegExp.escape ( _S_ )</h1>
37832+
<p>This function returns a copy of _S_ in which characters that are potentially special in a regular expression |Pattern| have been replaced by equivalent escape sequences.</p>
37833+
<p>It performs the following steps when called:</p>
37834+
37835+
<emu-alg>
37836+
1. If _S_ is not a String, throw a *TypeError* exception.
37837+
1. Let _escaped_ be the empty String.
37838+
1. Let _cpList_ be StringToCodePoints(_S_).
37839+
1. For each code point _c_ of _cpList_, do
37840+
1. If _escaped_ is the empty String and _c_ is matched by either |DecimalDigit| or |AsciiLetter|, then
37841+
1. NOTE: Escaping a leading digit ensures that output corresponds with pattern text which may be used after a `\0` character escape or a |DecimalEscape| such as `\1` and still match _S_ rather than be interpreted as an extension of the preceding escape sequence. Escaping a leading ASCII letter does the same for the context after `\c`.
37842+
1. Let _numericValue_ be the numeric value of _c_.
37843+
1. Let _hex_ be Number::toString(𝔽(_numericValue_), 16).
37844+
1. Assert: The length of _hex_ is 2.
37845+
1. Set _escaped_ to the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), *"x"*, and _hex_.
37846+
1. Else,
37847+
1. Set _escaped_ to the string-concatenation of _escaped_ and EncodeForRegExpEscape(_c_).
37848+
1. Return _escaped_.
37849+
</emu-alg>
37850+
37851+
<emu-note>
37852+
<p>Despite having similar names, EscapeRegExpPattern and `RegExp.escape` do not perform similar actions. The former escapes a pattern for representation as a string, while this function escapes a string for representation inside a pattern.</p>
37853+
</emu-note>
37854+
37855+
<emu-clause id="sec-encodeforregexpescape" type="abstract operation">
37856+
<h1>
37857+
EncodeForRegExpEscape (
37858+
_c_: a code point,
37859+
): a String
37860+
</h1>
37861+
<dl class="header">
37862+
<dt>description</dt>
37863+
<dd>It returns a string representing a |Pattern| for matching _c_. If _c_ is white space or an ASCII punctuator, the returned value is an escape sequence. Otherwise, the returned value is a string representation of _c_ itself.</dd>
37864+
</dl>
37865+
37866+
<emu-alg>
37867+
1. If _c_ is matched by |SyntaxCharacter| or _c_ is U+002F (SOLIDUS), then
37868+
1. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and UTF16EncodeCodePoint(_c_).
37869+
1. Else if _c_ is the code point listed in some cell of the “Code Point” column of <emu-xref href="#table-controlescape-code-point-values"></emu-xref>, then
37870+
1. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and the string in the “ControlEscape” column of the row whose “Code Point” column contains _c_.
37871+
1. Let _otherPunctuators_ be the string-concatenation of *",-=&lt;>#&amp;!%:;@~'`"* and the code unit 0x0022 (QUOTATION MARK).
37872+
1. Let _toEscape_ be StringToCodePoints(_otherPunctuators_).
37873+
1. If _toEscape_ contains _c_, _c_ is matched by |WhiteSpace| or |LineTerminator|, or _c_ has the same numeric value as a leading surrogate or trailing surrogate, then
37874+
1. If the numeric value of _c_ ≤ 0xFF, then
37875+
1. Let _hex_ be Number::toString(𝔽(_c_), 16).
37876+
1. Return the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), *"x"*, and StringPad(_hex_, 2, *"0"*, ~start~).
37877+
1. Let _escaped_ be the empty String.
37878+
1. Let _codeUnits_ be UTF16EncodeCodePoint(_c_).
37879+
1. For each code unit _cu_ of _codeUnits_, do
37880+
1. Set _escaped_ to the string-concatenation of _escaped_ and UnicodeEscape(_cu_).
37881+
1. Return _escaped_.
37882+
1. Return UTF16EncodeCodePoint(_c_).
37883+
</emu-alg>
37884+
</emu-clause>
37885+
</emu-clause>
37886+
3783037887
<emu-clause id="sec-regexp.prototype">
3783137888
<h1>RegExp.prototype</h1>
3783237889
<p>The initial value of `RegExp.prototype` is the RegExp prototype object.</p>

0 commit comments

Comments
 (0)