@@ -77,7 +77,6 @@ pub(crate) enum Token {
7777 // In order for the parser to recover from tokenization errors, we
7878 // need to be able to emit an error token.
7979 Error ( String ) ,
80-
8180 LeftBrace ,
8281 RightBrace ,
8382 Newline ,
@@ -98,15 +97,10 @@ pub(crate) enum Token {
9897 /// can form part of a symex. See the TX-2 Users Handbook,
9998 /// section 6-3.2, "RULES FOR SYMEX FORMATION".
10099 Hold ,
101-
102- NotHold ,
103-
100+ NotHold , // handled specially, there is no glyph for this.
104101 Arrow ,
105-
106102 Hand ,
107-
108103 Hash ( Script ) ,
109-
110104 Equals ,
111105
112106 /// Asterisk is used quite heavily (indicating deferred addressing)
@@ -119,19 +113,12 @@ pub(crate) enum Token {
119113 Asterisk ,
120114
121115 Pipe ,
122-
123116 ProperSuperset ,
124-
125117 IdenticalTo ,
126-
127118 Tilde ,
128-
129119 LessThan ,
130-
131120 GreaterThan ,
132-
133121 Intersection ,
134-
135122 Union ,
136123
137124 /// Solidus is often called "slash" but people often confuse slash
@@ -140,13 +127,9 @@ pub(crate) enum Token {
140127
141128 // @plus@ is actually not the correct glyph name, following sub.py.
142129 Plus ( Script ) ,
143-
144130 Minus ( Script ) ,
145-
146131 Times ,
147-
148132 LogicalOr ( Script ) ,
149-
150133 LogicalAnd ( Script ) ,
151134
152135 // Any unary "-" is handled in the parser.
@@ -161,13 +144,7 @@ pub(crate) enum Token {
161144 /// differently in some circumstances (it is a macro terminator).
162145 /// However it is part of a valid symex also, and so we will need
163146 /// to parse it as such.
164- NormalSymexSyllable ( String ) ,
165-
166- // No support for superscript apostrophe, underscore.
167- SuperscriptSymexSyllable ( String ) ,
168-
169- // No support for superscript apostrophe, underscore.
170- SubscriptSymexSyllable ( String ) ,
147+ SymexSyllable ( Script , String ) ,
171148
172149 // If change the representation of the dot in the token
173150 // definition, please also change DOT_CHAR.
@@ -181,7 +158,6 @@ pub(crate) enum Token {
181158 // this will help us to correctly process them when used as macro
182159 // terminators.
183160 Dot ( Script ) ,
184-
185161 Comma ,
186162}
187163
@@ -224,36 +200,33 @@ impl Display for Token {
224200 Token :: Digits ( script, numeric_literal) => {
225201 write ! ( f, "{}" , elevate( * script, numeric_literal. to_string( ) ) )
226202 }
227- Token :: NormalSymexSyllable ( s) => f. write_str ( s) ,
228- Token :: SuperscriptSymexSyllable ( s) => {
229- for ch in s. chars ( ) {
230- match superscript_char ( ch) {
231- Ok ( sup_ch) => f. write_char ( sup_ch) ,
232- Err ( _) => match ch {
233- 'α' => f. write_str ( "@sup_alpha@" ) ,
234- 'β' => f. write_str ( "@sup_beta@" ) ,
235- 'γ' => f. write_str ( "@sup_gamma@" ) ,
236- 'Δ' => f. write_str ( "@sup_delta@" ) ,
237- 'ε' => f. write_str ( "@sup_eps@" ) ,
238- 'λ' => f. write_str ( "@sup_lambda@" ) ,
239- _ => write ! ( f, "@sup_{ch}@" ) ,
240- } ,
241- } ?;
203+ Token :: SymexSyllable ( script, name) => {
204+ fn nochange ( ch : char ) -> Result < char , ( ) > {
205+ Ok ( ch)
242206 }
243- Ok ( ( ) )
244- }
245- Token :: SubscriptSymexSyllable ( s) => {
246- for ch in s. chars ( ) {
247- match subscript_char ( ch) {
207+ fn convert_to_sup ( ch : char ) -> Result < char , ( ) > {
208+ superscript_char ( ch) . map_err ( |_| ( ) )
209+ }
210+ fn convert_to_sub ( ch : char ) -> Result < char , ( ) > {
211+ subscript_char ( ch) . map_err ( |_| ( ) )
212+ }
213+ type Transformer = fn ( char ) -> Result < char , ( ) > ;
214+ let ( prefix, transform) : ( & ' static str , Transformer ) = match script {
215+ Script :: Super => ( "super_" , convert_to_sup) ,
216+ Script :: Normal => ( "" , nochange) ,
217+ Script :: Sub => ( "sub_" , convert_to_sub) ,
218+ } ;
219+ for ch in name. chars ( ) {
220+ match transform ( ch) {
248221 Ok ( sup_ch) => f. write_char ( sup_ch) ,
249- Err ( _ ) => match ch {
250- 'α' => f . write_str ( "@sub_alpha @") ,
251- 'β' => f . write_str ( "@sub_beta @") ,
252- 'γ' => f . write_str ( "@sub_gamma @") ,
253- 'Δ' => f . write_str ( "@sub_delta @") ,
254- 'ε' => f . write_str ( "@sub_eps @") ,
255- 'λ' => f . write_str ( "@sub_lambda @") ,
256- _ => write ! ( f, "@sub_ {ch}@" ) ,
222+ Err ( ( ) ) => match ch {
223+ 'α' => write ! ( f , "@{prefix}alpha @") ,
224+ 'β' => write ! ( f , "@{prefix}beta @") ,
225+ 'γ' => write ! ( f , "@{prefix}gamma @") ,
226+ 'Δ' => write ! ( f , "@{prefix}delta @") ,
227+ 'ε' => write ! ( f , "@{prefix}eps @") ,
228+ 'λ' => write ! ( f , "@{prefix}lambda @") ,
229+ _ => write ! ( f, "@{prefix} {ch}@" ) ,
257230 } ,
258231 } ?;
259232 }
@@ -434,11 +407,6 @@ mod lexer_impl_new {
434407 super :: Token :: Digits ( script, literal)
435408 } ;
436409 let make_symex = || -> Option < Token > {
437- let f = match script {
438- Script :: Super => Token :: SuperscriptSymexSyllable ,
439- Script :: Normal => Token :: NormalSymexSyllable ,
440- Script :: Sub => Token :: SubscriptSymexSyllable ,
441- } ;
442410 // The symex token always gives the characters in normal
443411 // script. The superscript/subscript information is
444412 // carried in the token variant
@@ -456,7 +424,7 @@ mod lexer_impl_new {
456424 panic ! ( "incoming token '{g:?}' was assigned as part of a symex syllable, but the resuting initial token body unexpectedly has more than one character (specifically, {n}): {name:?}" ) ;
457425 }
458426 }
459- Some ( f ( name) )
427+ Some ( Token :: SymexSyllable ( script , name) )
460428 } ;
461429 let only_normal = |t : Token | -> Option < Token > {
462430 match script {
@@ -613,94 +581,77 @@ mod lexer_impl_new {
613581 Token :: Minus ( Script :: Normal ) if incoming == Token :: GreaterThan => {
614582 TokenMergeResult :: Merged ( Token :: Arrow , merged_span)
615583 }
616- Token :: SuperscriptSymexSyllable ( mut existing) => match incoming {
617- Token :: SuperscriptSymexSyllable ( incoming) => {
618- existing. push_str ( & incoming) ;
619- TokenMergeResult :: Merged ( Token :: SuperscriptSymexSyllable ( existing) , merged_span)
620- }
621- Token :: Digits ( Script :: Super , literal) => {
622- existing. push_str ( & literal. digits ) ;
623- if literal. has_trailing_dot {
624- existing. push ( DOT_CHAR ) ;
625- }
626- TokenMergeResult :: Merged ( Token :: SuperscriptSymexSyllable ( existing) , merged_span)
627- }
628- other => TokenMergeResult :: Failed {
629- current : Ok ( Token :: SuperscriptSymexSyllable ( existing) ) ,
630- current_span,
631- incoming : Ok ( other) ,
632- incoming_span,
633- } ,
634- } ,
635- Token :: NormalSymexSyllable ( mut existing) => match incoming {
636- Token :: Hold => {
584+ Token :: SymexSyllable ( existing_script, mut existing_name) => match incoming {
585+ Token :: Hold if existing_script == Script :: Normal => {
637586 // overbar followed by h means not-hold, and we handle this case specially.
638- if existing == "\u{0305} " {
587+ if existing_name == "\u{0305} " {
639588 TokenMergeResult :: Merged ( Token :: NotHold , merged_span)
640589 } else {
641590 TokenMergeResult :: Failed {
642- current : Ok ( Token :: NormalSymexSyllable ( existing ) ) ,
591+ current : Ok ( Token :: SymexSyllable ( existing_script , existing_name ) ) ,
643592 current_span,
644593 incoming : Ok ( Token :: Hold ) ,
645594 incoming_span,
646595 }
647596 }
648597 }
649- Token :: NormalSymexSyllable ( incoming) => {
650- existing. push_str ( & incoming) ;
651- TokenMergeResult :: Merged ( Token :: NormalSymexSyllable ( existing) , merged_span)
598+ Token :: SymexSyllable ( incoming_script, incoming_name)
599+ if existing_script == incoming_script =>
600+ {
601+ existing_name. push_str ( & incoming_name) ;
602+ TokenMergeResult :: Merged (
603+ Token :: SymexSyllable ( existing_script, existing_name) ,
604+ merged_span,
605+ )
652606 }
653- Token :: Digits ( Script :: Normal , literal) => {
654- existing . push_str ( & literal. digits ) ;
607+ Token :: Digits ( incoming_script , literal) if existing_script == incoming_script => {
608+ existing_name . push_str ( & literal. digits ) ;
655609 if literal. has_trailing_dot {
656- existing . push ( DOT_CHAR ) ;
610+ existing_name . push ( DOT_CHAR ) ;
657611 }
658- TokenMergeResult :: Merged ( Token :: NormalSymexSyllable ( existing) , merged_span)
612+ TokenMergeResult :: Merged (
613+ Token :: SymexSyllable ( existing_script, existing_name) ,
614+ merged_span,
615+ )
659616 }
660617 other => TokenMergeResult :: Failed {
661- current : Ok ( Token :: NormalSymexSyllable ( existing ) ) ,
618+ current : Ok ( Token :: SymexSyllable ( existing_script , existing_name ) ) ,
662619 current_span,
663620 incoming : Ok ( other) ,
664621 incoming_span,
665622 } ,
666623 } ,
667- Token :: Digits ( left_script, mut existing) => match incoming {
668- Token :: Digits ( right_script, incoming) if left_script == right_script => {
669- existing. append_digits_of_literal ( incoming) ;
670- TokenMergeResult :: Merged ( Token :: Digits ( left_script, existing) , merged_span)
624+ Token :: Digits ( existing_script, mut existing_literal) => match incoming {
625+ Token :: Digits ( incoming_script, incoming_name)
626+ if existing_script == incoming_script =>
627+ {
628+ existing_literal. append_digits_of_literal ( incoming_name) ;
629+ TokenMergeResult :: Merged (
630+ Token :: Digits ( existing_script, existing_literal) ,
631+ merged_span,
632+ )
671633 }
672634 Token :: Dot ( right_script)
673- if left_script == right_script && !existing . has_trailing_dot =>
635+ if existing_script == right_script && !existing_literal . has_trailing_dot =>
674636 {
675- existing. has_trailing_dot = true ;
676- TokenMergeResult :: Merged ( Token :: Digits ( left_script, existing) , merged_span)
637+ existing_literal. has_trailing_dot = true ;
638+ TokenMergeResult :: Merged (
639+ Token :: Digits ( existing_script, existing_literal) ,
640+ merged_span,
641+ )
677642 }
678- Token :: NormalSymexSyllable ( sym) if left_script == Script :: Normal => {
679- let mut s: String = existing. digits ;
680- s. push_str ( & sym) ;
681- TokenMergeResult :: Merged ( Token :: NormalSymexSyllable ( s) , merged_span)
682- }
683- other => TokenMergeResult :: Failed {
684- current : Ok ( Token :: Digits ( left_script, existing) ) ,
685- current_span,
686- incoming : Ok ( other) ,
687- incoming_span,
688- } ,
689- } ,
690- Token :: SubscriptSymexSyllable ( mut existing) => match incoming {
691- Token :: SubscriptSymexSyllable ( incoming) => {
692- existing. push_str ( & incoming) ;
693- TokenMergeResult :: Merged ( Token :: SubscriptSymexSyllable ( existing) , merged_span)
694- }
695- Token :: Digits ( Script :: Sub , literal) => {
696- existing. push_str ( & literal. digits ) ;
697- if literal. has_trailing_dot {
698- existing. push ( DOT_CHAR ) ;
699- }
700- TokenMergeResult :: Merged ( Token :: SubscriptSymexSyllable ( existing) , merged_span)
643+ Token :: SymexSyllable ( incoming_script, sym)
644+ if existing_script == incoming_script =>
645+ {
646+ let mut existing_name: String = existing_literal. digits ;
647+ existing_name. push_str ( & sym) ;
648+ TokenMergeResult :: Merged (
649+ Token :: SymexSyllable ( existing_script, existing_name) ,
650+ merged_span,
651+ )
701652 }
702653 other => TokenMergeResult :: Failed {
703- current : Ok ( Token :: SubscriptSymexSyllable ( existing ) ) ,
654+ current : Ok ( Token :: Digits ( existing_script , existing_literal ) ) ,
704655 current_span,
705656 incoming : Ok ( other) ,
706657 incoming_span,
@@ -812,7 +763,10 @@ mod lexer_impl_new {
812763 assert_eq ! ( lex. get_next_spanned_token( ) , Some ( ( Ok ( Token :: Hold ) , 0 ..1 ) ) ) ;
813764 assert_eq ! (
814765 lex. get_next_spanned_token( ) ,
815- Some ( ( Ok ( Token :: NormalSymexSyllable ( "x" . to_string( ) ) ) , 1 ..2 ) )
766+ Some ( (
767+ Ok ( Token :: SymexSyllable ( Script :: Normal , "x" . to_string( ) ) ) ,
768+ 1 ..2
769+ ) )
816770 ) ;
817771 assert_eq ! ( lex. get_next_spanned_token( ) , None ) ;
818772 }
@@ -823,7 +777,10 @@ mod lexer_impl_new {
823777 let mut lex = GlyphTokenizer :: new ( "@sup_eps@" ) ;
824778 assert_eq ! (
825779 lex. get_next_spanned_token( ) ,
826- Some ( ( Ok ( Token :: SuperscriptSymexSyllable ( "ε" . to_string( ) ) ) , 0 ..9 ) )
780+ Some ( (
781+ Ok ( Token :: SymexSyllable ( Script :: Super , "ε" . to_string( ) ) ) ,
782+ 0 ..9
783+ ) )
827784 ) ;
828785 assert_eq ! ( lex. get_next_spanned_token( ) , None ) ;
829786 }
@@ -837,7 +794,10 @@ mod lexer_impl_new {
837794 let mut lex = GlyphTokenizer :: new ( input) ;
838795 assert_eq ! (
839796 lex. get_next_spanned_token( ) ,
840- Some ( ( Ok ( Token :: SuperscriptSymexSyllable ( "εW" . to_string( ) ) ) , 0 ..12 ) )
797+ Some ( (
798+ Ok ( Token :: SymexSyllable ( Script :: Super , "εW" . to_string( ) ) ) ,
799+ 0 ..12
800+ ) )
841801 ) ;
842802 assert_eq ! ( lex. get_next_spanned_token( ) , None ) ;
843803 }
@@ -851,11 +811,17 @@ mod lexer_impl_new {
851811 let mut lex = GlyphTokenizer :: new ( "@sup_eps@W" ) ;
852812 assert_eq ! (
853813 lex. get_next_spanned_token( ) ,
854- Some ( ( Ok ( Token :: SuperscriptSymexSyllable ( "ε" . to_string( ) ) ) , 0 ..9 ) )
814+ Some ( (
815+ Ok ( Token :: SymexSyllable ( Script :: Super , "ε" . to_string( ) ) ) ,
816+ 0 ..9
817+ ) )
855818 ) ;
856819 assert_eq ! (
857820 lex. get_next_spanned_token( ) ,
858- Some ( ( Ok ( Token :: NormalSymexSyllable ( "W" . to_string( ) ) ) , 9 ..10 ) )
821+ Some ( (
822+ Ok ( Token :: SymexSyllable ( Script :: Normal , "W" . to_string( ) ) ) ,
823+ 9 ..10
824+ ) )
859825 ) ;
860826 assert_eq ! ( lex. get_next_spanned_token( ) , None ) ;
861827 }
@@ -869,11 +835,17 @@ mod lexer_impl_new {
869835 let mut lex = GlyphTokenizer :: new ( "W Q" ) ;
870836 assert_eq ! (
871837 lex. get_next_spanned_token( ) ,
872- Some ( ( Ok ( Token :: NormalSymexSyllable ( "W" . to_string( ) ) ) , 0 ..1 ) )
838+ Some ( (
839+ Ok ( Token :: SymexSyllable ( Script :: Normal , "W" . to_string( ) ) ) ,
840+ 0 ..1
841+ ) )
873842 ) ;
874843 assert_eq ! (
875844 lex. get_next_spanned_token( ) ,
876- Some ( ( Ok ( Token :: NormalSymexSyllable ( "Q" . to_string( ) ) ) , 2 ..3 ) )
845+ Some ( (
846+ Ok ( Token :: SymexSyllable ( Script :: Normal , "Q" . to_string( ) ) ) ,
847+ 2 ..3
848+ ) )
877849 ) ;
878850 assert_eq ! ( lex. get_next_spanned_token( ) , None ) ;
879851 }
0 commit comments