@@ -2,6 +2,8 @@ import fs from "fs"
2
2
import type { DOMWindow } from "jsdom"
3
3
import { JSDOM } from "jsdom"
4
4
import { ESLint } from "eslint"
5
+ import { getLatestUnicodeGeneralCategoryValues } from "./get-latest-unicode-general-category-values"
6
+ import { getLatestUnicodeScriptValues } from "./get-latest-unicode-script-values"
5
7
6
8
const DATA_SOURCES = [
7
9
{
@@ -43,8 +45,8 @@ const DATA_SOURCES = [
43
45
url : "https://tc39.es/ecma262/multipage/text-processing.html" ,
44
46
version : 2023 ,
45
47
binProperties : "#table-binary-unicode-properties" ,
46
- gcValues : "#table-unicode-general-category-values" ,
47
- scValues : "#table-unicode-script-values" ,
48
+ gcValues : getLatestUnicodeGeneralCategoryValues ,
49
+ scValues : getLatestUnicodeScriptValues ,
48
50
} ,
49
51
]
50
52
const FILE_PATH = "src/unicode/properties.ts"
@@ -96,13 +98,21 @@ type Datum = {
96
98
} while ( window == null )
97
99
98
100
logger . log ( "Parsing tables" )
99
- datum . binProperties = collectValues (
101
+ datum . binProperties = await collectValues (
100
102
window ,
101
103
binProperties ,
102
104
existing . binProperties ,
103
105
)
104
- datum . gcValues = collectValues ( window , gcValues , existing . gcValues )
105
- datum . scValues = collectValues ( window , scValues , existing . scValues )
106
+ datum . gcValues = await collectValues (
107
+ window ,
108
+ gcValues ,
109
+ existing . gcValues ,
110
+ )
111
+ datum . scValues = await collectValues (
112
+ window ,
113
+ scValues ,
114
+ existing . scValues ,
115
+ )
106
116
107
117
logger . log ( "Done" )
108
118
}
@@ -169,32 +179,55 @@ export function isValidLoneUnicodeProperty(version: number, value: string): bool
169
179
process . exitCode = 1
170
180
} )
171
181
172
- function collectValues (
182
+ async function collectValues (
173
183
window : DOMWindow ,
174
- id : string ,
184
+ idSelectorOrProvider : string | ( ( ) => AsyncIterable < string > ) ,
175
185
existingSet : Set < string > ,
176
- ) : string [ ] {
177
- const selector = `${ id } td:nth-child(1) code`
178
- const nodes = window . document . querySelectorAll ( selector )
179
- const values = Array . from ( nodes , ( node ) => node . textContent ?? "" )
180
- . filter ( ( value ) => {
181
- if ( existingSet . has ( value ) ) {
182
- return false
183
- }
184
- existingSet . add ( value )
185
- return true
186
- } )
187
- . sort ( undefined )
186
+ ) : Promise < string [ ] > {
187
+ const getValues =
188
+ typeof idSelectorOrProvider === "function"
189
+ ? idSelectorOrProvider
190
+ : function * ( ) : Iterable < string > {
191
+ const selector = `${ idSelectorOrProvider } td:nth-child(1) code`
192
+ const nodes = window . document . querySelectorAll ( selector )
193
+ if ( nodes . length === 0 ) {
194
+ throw new Error ( `No nodes found for selector ${ selector } ` )
195
+ }
196
+ logger . log (
197
+ "%o nodes of %o were found." ,
198
+ nodes . length ,
199
+ selector ,
200
+ )
201
+ for ( const node of Array . from ( nodes ) ) {
202
+ yield node . textContent ?? ""
203
+ }
204
+ }
205
+
206
+ const missing = new Set ( existingSet )
207
+ const values = new Set < string > ( )
208
+ let allCount = 0
209
+
210
+ for await ( const value of getValues ( ) ) {
211
+ allCount ++
212
+ missing . delete ( value )
213
+ if ( existingSet . has ( value ) ) {
214
+ continue
215
+ }
216
+ existingSet . add ( value )
217
+ values . add ( value )
218
+ }
219
+
220
+ if ( missing . size > 0 ) {
221
+ throw new Error ( `Missing values: ${ Array . from ( missing ) . join ( ", " ) } ` )
222
+ }
188
223
189
224
logger . log (
190
- "%o nodes of %o were found, then %o adopted and %o ignored as duplication." ,
191
- nodes . length ,
192
- selector ,
193
- values . length ,
194
- nodes . length - values . length ,
225
+ "%o adopted and %o ignored as duplication." ,
226
+ values . size ,
227
+ allCount - values . size ,
195
228
)
196
229
197
- return values
230
+ return [ ... values ] . sort ( ( a , b ) => ( a > b ? 1 : a < b ? - 1 : 0 ) )
198
231
}
199
232
200
233
function makeClassDeclarationCode ( versions : string [ ] ) : string {
0 commit comments