@@ -335,49 +335,54 @@ def __init__(self):
335
335
self .from_bcp_47_uninherited = None
336
336
# Whether the parser is in a <td> element
337
337
self ._td = False
338
- # Whether the parser is after a <br> element within the current <tr > element
339
- self ._br = False
338
+ # Whether the parser ignores the rest of the current <td > element
339
+ self ._disengaged = False
340
340
# The text of the <td> elements of the current <tr> element.
341
341
self ._current_tr = []
342
342
343
- def handle_starttag (self , tag , attrs ):
344
- if tag == 'br' :
345
- self ._br = True
343
+ def handle_starttag (self , tag , attrs ):
344
+ if tag == 'a' :
345
+ if self ._current_tr and not self ._disengaged :
346
+ self ._current_tr [- 1 ] = ''
347
+ self ._disengaged = True
348
+ elif tag == 'br' :
349
+ self ._disengaged = True
346
350
elif tag == 'meta' :
347
351
for attr , value in attrs :
348
352
if attr == 'name' and value == 'updated_at' :
349
- self .header = self .get_starttag_text ()
353
+ self .header = self .get_starttag_text ()
350
354
break
351
355
elif tag == 'td' :
352
356
self ._td = True
353
- self ._current_tr .append ('' )
357
+ self ._current_tr .append ('' )
354
358
elif tag == 'tr' :
355
- self ._br = False
359
+ self ._disengaged = False
356
360
self ._current_tr = []
357
361
358
- def handle_endtag (self , tag ):
362
+ def handle_endtag (self , tag ):
359
363
if tag == 'td' :
360
364
self ._td = False
365
+ self ._disengaged = False
361
366
elif tag == 'tr' and self ._current_tr :
362
- expect (2 <= len (self ._current_tr ) <= 3 )
363
- name = self ._current_tr [0 ].strip ()
364
- tag = self ._current_tr [1 ].strip ("\t \n \v \f \r '" )
367
+ expect (2 <= len (self ._current_tr ) <= 3 )
368
+ name = self ._current_tr [0 ].strip ()
369
+ tag = self ._current_tr [1 ].strip ("\t \n \v \f \r '" )
365
370
rank = 0
366
- if len (tag ) > 4 :
367
- expect (tag .endswith ( ' (deprecated)' ), 'ill-formed OpenType tag: %s' % tag )
368
- name += '(deprecated)'
369
- tag = tag .split (' ' )[0 ]
371
+ if len (tag ) > 4 :
372
+ expect (tag .endswith ( ' (deprecated)' ), 'ill-formed OpenType tag: %s' % tag )
373
+ name += ' (deprecated)'
374
+ tag = tag .split (' ' )[0 ]
370
375
rank = 1
371
- self .names [tag ] = re .sub (' languages$' , '' , name )
376
+ self .names [tag ] = re .sub (' languages$' , '' , name )
372
377
if not self ._current_tr [2 ]:
373
378
return
374
- iso_codes = self ._current_tr [2 ].strip ()
375
- self .to_bcp_47 [tag ].update (ISO_639_3_TO_1 .get (code , code ) for code in iso_codes .replace (' ' , '' ).split (',' ))
376
- rank += 2 * len (self .to_bcp_47 [tag ])
379
+ iso_codes = self ._current_tr [2 ].strip ()
380
+ self .to_bcp_47 [tag ].update (ISO_639_3_TO_1 .get (code , code ) for code in iso_codes .replace (' ' , '' ).split (',' ))
381
+ rank += 2 * len (self .to_bcp_47 [tag ])
377
382
self .ranks [tag ] = rank
378
383
379
- def handle_data (self , data ):
380
- if self ._td and not self ._br :
384
+ def handle_data (self , data ):
385
+ if self ._td and not self ._disengaged :
381
386
self ._current_tr [- 1 ] += data
382
387
383
388
def handle_charref (self , name ):
@@ -980,7 +985,7 @@ def same_tag(bcp_47_tag, ot_tags):
980
985
print ('fn strncmp(s1: &str, s2: &str, n: usize) -> bool {' )
981
986
print (' let n1 = core::cmp::min(n, s1.len());' )
982
987
print (' let n2 = core::cmp::min(n, s2.len());' )
983
- print (' & s1[..n1] == & s2[..n2]' )
988
+ print (' s1[..n1] == s2[..n2]' )
984
989
print ('}' )
985
990
print ()
986
991
print ('/// Converts a multi-subtag BCP 47 language tag to language tags.' )
0 commit comments