@@ -26,110 +26,102 @@ THE SOFTWARE.
2626
2727---------------------------------------------------------------------------*/
2828
29- function IsValidAdjacentForKatakanaMiddleDot ( char : string ) : boolean {
30- const codePoint = char . codePointAt ( 0 )
31- // deno-coverage-ignore - internal condition never reached
32- if ( codePoint === undefined ) return false
29+ function IsValidAdjacentForKatakanaMiddleDot ( cp : number ) : boolean {
3330 return (
34- ( codePoint >= 0x3040 && codePoint <= 0x309F ) || // Hiragana
35- ( codePoint >= 0x30A0 && codePoint <= 0x30FF && codePoint !== 0x30FB ) || // Katakana (excluding U+30FB)
36- ( codePoint >= 0x4E00 && codePoint <= 0x9FFF ) // Han (CJK Unified Ideographs)
31+ ( cp >= 0x3040 && cp <= 0x309F ) || // Hiragana
32+ ( cp >= 0x30A0 && cp <= 0x30FF && cp !== 0x30FB ) || // Katakana (excluding U+30FB)
33+ ( cp >= 0x4E00 && cp <= 0x9FFF ) // Han (CJK Unified Ideographs)
3734 )
3835}
3936/**
40- * Returns true if the value is a Hostname
41- * @specification
37+ * Returns true if the value is an IDN Hostname
38+ * @specification Json Schema 2020-12
4239 */
4340export function IsIdnHostname ( value : string ) : boolean {
44- if ( value . length === 0 ) return false
45- if ( value . includes ( ' ' ) ) return false
46- // Allowed label separators per RFC3490: U+002E, U+3002, U+FF0E, U+FF61.
47- const separators = / [ \u002E \u3002 \uFF0E \uFF61 ] / g
48- // Normalize (NFC) and replace allowed separators with a dot.
49- const normalized = value . normalize ( 'NFC' ) . replace ( separators , '.' )
41+ if ( value . length === 0 || value . includes ( ' ' ) ) return false
42+ // Normalize (NFC) and replace allowed separators with a dot
43+ // Allowed label separators per RFC3490: U+002E, U+3002, U+FF0E, U+FF61
44+ const normalized = value . normalize ( 'NFC' ) . replace ( / [ \u002E \u3002 \uFF0E \uFF61 ] / g, '.' )
5045 if ( normalized . length > 253 ) return false
51- // Split into labels; disallow empty labels.
5246 const labels = normalized . split ( '.' )
53- if ( labels . some ( ( label ) => label . length === 0 ) ) return false
54-
5547 for ( const label of labels ) {
56- // Each label must be ≤ 63 characters.
57- if ( label . length > 63 ) return false
58- // Labels must not begin or end with a hyphen.
59- if ( label . startsWith ( '-' ) || label . endsWith ( '-' ) ) return false
60-
61- // A-label (punycode) checks.
62- if ( / ^ x n - - / i. test ( label ) ) {
48+ if ( label . length === 0 || label . length > 63 ) return false
49+ // Labels must not begin or end with a hyphen
50+ if ( label . charCodeAt ( 0 ) === 45 || label . charCodeAt ( label . length - 1 ) === 45 ) return false
51+ // A-label (punycode) checks
52+ if (
53+ ( label . charCodeAt ( 0 ) === 120 || label . charCodeAt ( 0 ) === 88 ) && // 'x' or 'X'
54+ ( label . charCodeAt ( 1 ) === 110 || label . charCodeAt ( 1 ) === 78 ) && // 'n' or 'N'
55+ label . charCodeAt ( 2 ) === 45 && // '-'
56+ label . charCodeAt ( 3 ) === 45 // '-'
57+ ) {
6358 const punycodePart = label . slice ( 4 )
64- if ( punycodePart . length < 2 ) return false
65- if ( punycodePart . includes ( '---' ) ) return false
59+ if ( punycodePart . length < 2 || punycodePart . includes ( '---' ) ) return false
6660 continue
6761 }
68- // U-label: Reject if any disallowed code points occur.
69- // Disallowed: U+302E, U+302F, U+3031, U+3032, U+3033, U+3034, U+3035, U+303B, U+0640, U+07FA.
70- if ( / [ \u302E \u302F \u3031 \u3032 \u3033 \u3034 \u3035 \u303B \u0640 \u07FA ] / . test ( label ) ) {
71- return false
72- }
73- // Disallow labels starting with certain combining marks.
74- const firstChar = label . charAt ( 0 )
75- if ( / [ \u0903 \u0300 \u0488 ] / . test ( firstChar ) ) return false
76-
77- // Check each character within the label.
62+ // U-label checks
63+ let hasArabicIndic = false
64+ let hasExtendedArabicIndic = false
7865 for ( let i = 0 ; i < label . length ; i ++ ) {
79- const char = label . charAt ( i )
80- // --- MIDDLE DOT (U+00B7) ---
81- // Must be flanked on both sides by "l" or "L".
82- if ( char === '\u00B7' ) {
66+ // deno-coverage-ignore
67+ const cp = label . codePointAt ( i ) ?? 0
68+ // Disallowed code points
69+ if (
70+ cp === 0x302E || cp === 0x302F ||
71+ cp === 0x3031 || cp === 0x3032 || cp === 0x3033 || cp === 0x3034 || cp === 0x3035 ||
72+ cp === 0x303B || cp === 0x0640 || cp === 0x07FA
73+ ) return false
74+ // Disallow labels starting with certain combining marks
75+ if ( i === 0 && ( cp === 0x0903 || cp === 0x0300 || cp === 0x0488 ) ) return false
76+ // MIDDLE DOT (U+00B7) must be flanked by 'l' or 'L'
77+ if ( cp === 0x00B7 ) {
8378 if ( i === 0 || i === label . length - 1 ) return false
84- const prev = label . charAt ( i - 1 )
85- const next = label . charAt ( i + 1 )
86- if ( ! / ^ [ l L ] $ / . test ( prev ) || ! / ^ [ l L ] $ / . test ( next ) ) return false
79+ // deno-coverage-ignore
80+ const prev = label . codePointAt ( i - 1 ) ?? 0
81+ // deno-coverage-ignore
82+ const next = label . codePointAt ( i + 1 ) ?? 0
83+ if ( ( prev !== 108 && prev !== 76 ) || ( next !== 108 && next !== 76 ) ) return false
8784 }
88- // --- KATAKANA MIDDLE DOT (U+30FB) ---
89- if ( char === '\u30FB' ) {
90- // If label is a single character, it's invalid.
85+ // KATAKANA MIDDLE DOT (U+30FB) | U+30FB is below U+FFFF so stride is always 1
86+ if ( cp === 0x30FB ) {
9187 if ( label . length === 1 ) return false
9288 if ( i === 0 ) {
93- // At beginning: check following character.
94- const next = label . charAt ( i + 1 )
89+ // deno-coverage-ignore
90+ const next = label . codePointAt ( i + 1 ) ?? 0
9591 if ( ! IsValidAdjacentForKatakanaMiddleDot ( next ) ) return false
9692 } else {
97- // In the middle: check both adjacent characters.
98- const prev = label . charAt ( i - 1 )
99- const next = label . charAt ( i + 1 )
100- if ( ! IsValidAdjacentForKatakanaMiddleDot ( prev ) || ! IsValidAdjacentForKatakanaMiddleDot ( next ) ) {
101- return false
102- }
93+ // deno-coverage-ignore
94+ const prev = label . codePointAt ( i - 1 ) ?? 0
95+ // deno-coverage-ignore
96+ const next = label . codePointAt ( i + 1 ) ?? 0
97+ if ( ! IsValidAdjacentForKatakanaMiddleDot ( prev ) || ! IsValidAdjacentForKatakanaMiddleDot ( next ) ) return false
10398 }
10499 }
105- // --- Greek Keraia (U+0375) ---
106- if ( char === '\u0375' ) {
100+ // Greek KERAIA (U+0375) | U+0375 is below U+FFFF so stride is always 1
101+ if ( cp === 0x0375 ) {
107102 if ( i === label . length - 1 ) return false
108- const next = label . charAt ( i + 1 )
109- if ( ! / [ \u0370 - \u03FF ] / . test ( next ) ) return false
103+ // deno-coverage-ignore
104+ const next = label . codePointAt ( i + 1 ) ?? 0
105+ if ( next < 0x0370 || next > 0x03FF ) return false
110106 }
111-
112- // --- Hebrew GERESH (U+05F3) and GERSHAYIM (U+05F4) ---
113- if ( char === '\u05F3' || char === '\u05F4' ) {
107+ // Hebrew GERESH (U+05F3) and GERSHAYIM (U+05F4)
108+ if ( cp === 0x05F3 || cp === 0x05F4 ) {
114109 if ( i === 0 ) return false
115- const prev = label . charAt ( i - 1 )
116- if ( ! / [ \u05D0 - \u05EA ] / . test ( prev ) ) return false
110+ // deno-coverage-ignore
111+ const prev = label . codePointAt ( i - 1 ) ?? 0
112+ if ( prev < 0x05D0 || prev > 0x05EA ) return false
117113 }
118- // --- ZERO WIDTH JOINER (U+200D) ---
119- if ( char === '\u200D' ) {
114+ // ZERO WIDTH JOINER (U+200D)
115+ if ( cp === 0x200D ) {
120116 if ( i === 0 ) return false
121- const prev = label . charAt ( i - 1 )
122- if ( prev !== '\u094D' ) return false
117+ // deno-coverage-ignore
118+ const prev = label . codePointAt ( i - 1 ) ?? 0
119+ if ( prev !== 0x094D ) return false
123120 }
124- // ZERO WIDTH NON-JOINER (U+200C) is allowed.
125- }
126- // --- Arabic-Indic digits vs. Extended Arabic-Indic digits ---
127- let hasArabicIndic = false
128- let hasExtendedArabicIndic = false
129- for ( let i = 0 ; i < label . length ; i ++ ) {
130- const char = label . charAt ( i )
131- if ( / [ \u0660 - \u0669 ] / . test ( char ) ) hasArabicIndic = true
132- if ( / [ \u06F0 - \u06F9 ] / . test ( char ) ) hasExtendedArabicIndic = true
121+ // Arabic-Indic digits
122+ if ( cp >= 0x0660 && cp <= 0x0669 ) hasArabicIndic = true
123+ // Extended Arabic-Indic digits
124+ if ( cp >= 0x06F0 && cp <= 0x06F9 ) hasExtendedArabicIndic = true
133125 }
134126 if ( hasArabicIndic && hasExtendedArabicIndic ) return false
135127 }
0 commit comments