Skip to content

Commit e9e9767

Browse files
committed
[assembler] Use just one token variant for a symex syllable.
1 parent e751e54 commit e9e9767

File tree

4 files changed

+142
-167
lines changed

4 files changed

+142
-167
lines changed

assembler/src/asmlib/lexer.rs

Lines changed: 101 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ pub(crate) enum Token {
7777
// In order for the parser to recover from tokenization errors, we
7878
// need to be able to emit an error token.
7979
Error(String),
80-
8180
LeftBrace,
8281
RightBrace,
8382
Newline,
@@ -98,15 +97,10 @@ pub(crate) enum Token {
9897
/// can form part of a symex. See the TX-2 Users Handbook,
9998
/// section 6-3.2, "RULES FOR SYMEX FORMATION".
10099
Hold,
101-
102-
NotHold,
103-
100+
NotHold, // handled specially, there is no glyph for this.
104101
Arrow,
105-
106102
Hand,
107-
108103
Hash(Script),
109-
110104
Equals,
111105

112106
/// Asterisk is used quite heavily (indicating deferred addressing)
@@ -119,19 +113,12 @@ pub(crate) enum Token {
119113
Asterisk,
120114

121115
Pipe,
122-
123116
ProperSuperset,
124-
125117
IdenticalTo,
126-
127118
Tilde,
128-
129119
LessThan,
130-
131120
GreaterThan,
132-
133121
Intersection,
134-
135122
Union,
136123

137124
/// Solidus is often called "slash" but people often confuse slash
@@ -140,13 +127,9 @@ pub(crate) enum Token {
140127

141128
// @plus@ is actually not the correct glyph name, following sub.py.
142129
Plus(Script),
143-
144130
Minus(Script),
145-
146131
Times,
147-
148132
LogicalOr(Script),
149-
150133
LogicalAnd(Script),
151134

152135
// Any unary "-" is handled in the parser.
@@ -161,13 +144,7 @@ pub(crate) enum Token {
161144
/// differently in some circumstances (it is a macro terminator).
162145
/// However it is part of a valid symex also, and so we will need
163146
/// to parse it as such.
164-
NormalSymexSyllable(String),
165-
166-
// No support for superscript apostrophe, underscore.
167-
SuperscriptSymexSyllable(String),
168-
169-
// No support for superscript apostrophe, underscore.
170-
SubscriptSymexSyllable(String),
147+
SymexSyllable(Script, String),
171148

172149
// If change the representation of the dot in the token
173150
// definition, please also change DOT_CHAR.
@@ -181,7 +158,6 @@ pub(crate) enum Token {
181158
// this will help us to correctly process them when used as macro
182159
// terminators.
183160
Dot(Script),
184-
185161
Comma,
186162
}
187163

@@ -224,36 +200,33 @@ impl Display for Token {
224200
Token::Digits(script, numeric_literal) => {
225201
write!(f, "{}", elevate(*script, numeric_literal.to_string()))
226202
}
227-
Token::NormalSymexSyllable(s) => f.write_str(s),
228-
Token::SuperscriptSymexSyllable(s) => {
229-
for ch in s.chars() {
230-
match superscript_char(ch) {
231-
Ok(sup_ch) => f.write_char(sup_ch),
232-
Err(_) => match ch {
233-
'α' => f.write_str("@sup_alpha@"),
234-
'β' => f.write_str("@sup_beta@"),
235-
'γ' => f.write_str("@sup_gamma@"),
236-
'Δ' => f.write_str("@sup_delta@"),
237-
'ε' => f.write_str("@sup_eps@"),
238-
'λ' => f.write_str("@sup_lambda@"),
239-
_ => write!(f, "@sup_{ch}@"),
240-
},
241-
}?;
203+
Token::SymexSyllable(script, name) => {
204+
fn nochange(ch: char) -> Result<char, ()> {
205+
Ok(ch)
242206
}
243-
Ok(())
244-
}
245-
Token::SubscriptSymexSyllable(s) => {
246-
for ch in s.chars() {
247-
match subscript_char(ch) {
207+
fn convert_to_sup(ch: char) -> Result<char, ()> {
208+
superscript_char(ch).map_err(|_| ())
209+
}
210+
fn convert_to_sub(ch: char) -> Result<char, ()> {
211+
subscript_char(ch).map_err(|_| ())
212+
}
213+
type Transformer = fn(char) -> Result<char, ()>;
214+
let (prefix, transform): (&'static str, Transformer) = match script {
215+
Script::Super => ("super_", convert_to_sup),
216+
Script::Normal => ("", nochange),
217+
Script::Sub => ("sub_", convert_to_sub),
218+
};
219+
for ch in name.chars() {
220+
match transform(ch) {
248221
Ok(sup_ch) => f.write_char(sup_ch),
249-
Err(_) => match ch {
250-
'α' => f.write_str("@sub_alpha@"),
251-
'β' => f.write_str("@sub_beta@"),
252-
'γ' => f.write_str("@sub_gamma@"),
253-
'Δ' => f.write_str("@sub_delta@"),
254-
'ε' => f.write_str("@sub_eps@"),
255-
'λ' => f.write_str("@sub_lambda@"),
256-
_ => write!(f, "@sub_{ch}@"),
222+
Err(()) => match ch {
223+
'α' => write!(f, "@{prefix}alpha@"),
224+
'β' => write!(f, "@{prefix}beta@"),
225+
'γ' => write!(f, "@{prefix}gamma@"),
226+
'Δ' => write!(f, "@{prefix}delta@"),
227+
'ε' => write!(f, "@{prefix}eps@"),
228+
'λ' => write!(f, "@{prefix}lambda@"),
229+
_ => write!(f, "@{prefix}{ch}@"),
257230
},
258231
}?;
259232
}
@@ -434,11 +407,6 @@ mod lexer_impl_new {
434407
super::Token::Digits(script, literal)
435408
};
436409
let make_symex = || -> Option<Token> {
437-
let f = match script {
438-
Script::Super => Token::SuperscriptSymexSyllable,
439-
Script::Normal => Token::NormalSymexSyllable,
440-
Script::Sub => Token::SubscriptSymexSyllable,
441-
};
442410
// The symex token always gives the characters in normal
443411
// script. The superscript/subscript information is
444412
// carried in the token variant
@@ -456,7 +424,7 @@ mod lexer_impl_new {
456424
panic!("incoming token '{g:?}' was assigned as part of a symex syllable, but the resuting initial token body unexpectedly has more than one character (specifically, {n}): {name:?}");
457425
}
458426
}
459-
Some(f(name))
427+
Some(Token::SymexSyllable(script, name))
460428
};
461429
let only_normal = |t: Token| -> Option<Token> {
462430
match script {
@@ -613,94 +581,77 @@ mod lexer_impl_new {
613581
Token::Minus(Script::Normal) if incoming == Token::GreaterThan => {
614582
TokenMergeResult::Merged(Token::Arrow, merged_span)
615583
}
616-
Token::SuperscriptSymexSyllable(mut existing) => match incoming {
617-
Token::SuperscriptSymexSyllable(incoming) => {
618-
existing.push_str(&incoming);
619-
TokenMergeResult::Merged(Token::SuperscriptSymexSyllable(existing), merged_span)
620-
}
621-
Token::Digits(Script::Super, literal) => {
622-
existing.push_str(&literal.digits);
623-
if literal.has_trailing_dot {
624-
existing.push(DOT_CHAR);
625-
}
626-
TokenMergeResult::Merged(Token::SuperscriptSymexSyllable(existing), merged_span)
627-
}
628-
other => TokenMergeResult::Failed {
629-
current: Ok(Token::SuperscriptSymexSyllable(existing)),
630-
current_span,
631-
incoming: Ok(other),
632-
incoming_span,
633-
},
634-
},
635-
Token::NormalSymexSyllable(mut existing) => match incoming {
636-
Token::Hold => {
584+
Token::SymexSyllable(existing_script, mut existing_name) => match incoming {
585+
Token::Hold if existing_script == Script::Normal => {
637586
// overbar followed by h means not-hold, and we handle this case specially.
638-
if existing == "\u{0305}" {
587+
if existing_name == "\u{0305}" {
639588
TokenMergeResult::Merged(Token::NotHold, merged_span)
640589
} else {
641590
TokenMergeResult::Failed {
642-
current: Ok(Token::NormalSymexSyllable(existing)),
591+
current: Ok(Token::SymexSyllable(existing_script, existing_name)),
643592
current_span,
644593
incoming: Ok(Token::Hold),
645594
incoming_span,
646595
}
647596
}
648597
}
649-
Token::NormalSymexSyllable(incoming) => {
650-
existing.push_str(&incoming);
651-
TokenMergeResult::Merged(Token::NormalSymexSyllable(existing), merged_span)
598+
Token::SymexSyllable(incoming_script, incoming_name)
599+
if existing_script == incoming_script =>
600+
{
601+
existing_name.push_str(&incoming_name);
602+
TokenMergeResult::Merged(
603+
Token::SymexSyllable(existing_script, existing_name),
604+
merged_span,
605+
)
652606
}
653-
Token::Digits(Script::Normal, literal) => {
654-
existing.push_str(&literal.digits);
607+
Token::Digits(incoming_script, literal) if existing_script == incoming_script => {
608+
existing_name.push_str(&literal.digits);
655609
if literal.has_trailing_dot {
656-
existing.push(DOT_CHAR);
610+
existing_name.push(DOT_CHAR);
657611
}
658-
TokenMergeResult::Merged(Token::NormalSymexSyllable(existing), merged_span)
612+
TokenMergeResult::Merged(
613+
Token::SymexSyllable(existing_script, existing_name),
614+
merged_span,
615+
)
659616
}
660617
other => TokenMergeResult::Failed {
661-
current: Ok(Token::NormalSymexSyllable(existing)),
618+
current: Ok(Token::SymexSyllable(existing_script, existing_name)),
662619
current_span,
663620
incoming: Ok(other),
664621
incoming_span,
665622
},
666623
},
667-
Token::Digits(left_script, mut existing) => match incoming {
668-
Token::Digits(right_script, incoming) if left_script == right_script => {
669-
existing.append_digits_of_literal(incoming);
670-
TokenMergeResult::Merged(Token::Digits(left_script, existing), merged_span)
624+
Token::Digits(existing_script, mut existing_literal) => match incoming {
625+
Token::Digits(incoming_script, incoming_name)
626+
if existing_script == incoming_script =>
627+
{
628+
existing_literal.append_digits_of_literal(incoming_name);
629+
TokenMergeResult::Merged(
630+
Token::Digits(existing_script, existing_literal),
631+
merged_span,
632+
)
671633
}
672634
Token::Dot(right_script)
673-
if left_script == right_script && !existing.has_trailing_dot =>
635+
if existing_script == right_script && !existing_literal.has_trailing_dot =>
674636
{
675-
existing.has_trailing_dot = true;
676-
TokenMergeResult::Merged(Token::Digits(left_script, existing), merged_span)
637+
existing_literal.has_trailing_dot = true;
638+
TokenMergeResult::Merged(
639+
Token::Digits(existing_script, existing_literal),
640+
merged_span,
641+
)
677642
}
678-
Token::NormalSymexSyllable(sym) if left_script == Script::Normal => {
679-
let mut s: String = existing.digits;
680-
s.push_str(&sym);
681-
TokenMergeResult::Merged(Token::NormalSymexSyllable(s), merged_span)
682-
}
683-
other => TokenMergeResult::Failed {
684-
current: Ok(Token::Digits(left_script, existing)),
685-
current_span,
686-
incoming: Ok(other),
687-
incoming_span,
688-
},
689-
},
690-
Token::SubscriptSymexSyllable(mut existing) => match incoming {
691-
Token::SubscriptSymexSyllable(incoming) => {
692-
existing.push_str(&incoming);
693-
TokenMergeResult::Merged(Token::SubscriptSymexSyllable(existing), merged_span)
694-
}
695-
Token::Digits(Script::Sub, literal) => {
696-
existing.push_str(&literal.digits);
697-
if literal.has_trailing_dot {
698-
existing.push(DOT_CHAR);
699-
}
700-
TokenMergeResult::Merged(Token::SubscriptSymexSyllable(existing), merged_span)
643+
Token::SymexSyllable(incoming_script, sym)
644+
if existing_script == incoming_script =>
645+
{
646+
let mut existing_name: String = existing_literal.digits;
647+
existing_name.push_str(&sym);
648+
TokenMergeResult::Merged(
649+
Token::SymexSyllable(existing_script, existing_name),
650+
merged_span,
651+
)
701652
}
702653
other => TokenMergeResult::Failed {
703-
current: Ok(Token::SubscriptSymexSyllable(existing)),
654+
current: Ok(Token::Digits(existing_script, existing_literal)),
704655
current_span,
705656
incoming: Ok(other),
706657
incoming_span,
@@ -812,7 +763,10 @@ mod lexer_impl_new {
812763
assert_eq!(lex.get_next_spanned_token(), Some((Ok(Token::Hold), 0..1)));
813764
assert_eq!(
814765
lex.get_next_spanned_token(),
815-
Some((Ok(Token::NormalSymexSyllable("x".to_string())), 1..2))
766+
Some((
767+
Ok(Token::SymexSyllable(Script::Normal, "x".to_string())),
768+
1..2
769+
))
816770
);
817771
assert_eq!(lex.get_next_spanned_token(), None);
818772
}
@@ -823,7 +777,10 @@ mod lexer_impl_new {
823777
let mut lex = GlyphTokenizer::new("@sup_eps@");
824778
assert_eq!(
825779
lex.get_next_spanned_token(),
826-
Some((Ok(Token::SuperscriptSymexSyllable("ε".to_string())), 0..9))
780+
Some((
781+
Ok(Token::SymexSyllable(Script::Super, "ε".to_string())),
782+
0..9
783+
))
827784
);
828785
assert_eq!(lex.get_next_spanned_token(), None);
829786
}
@@ -837,7 +794,10 @@ mod lexer_impl_new {
837794
let mut lex = GlyphTokenizer::new(input);
838795
assert_eq!(
839796
lex.get_next_spanned_token(),
840-
Some((Ok(Token::SuperscriptSymexSyllable("εW".to_string())), 0..12))
797+
Some((
798+
Ok(Token::SymexSyllable(Script::Super, "εW".to_string())),
799+
0..12
800+
))
841801
);
842802
assert_eq!(lex.get_next_spanned_token(), None);
843803
}
@@ -851,11 +811,17 @@ mod lexer_impl_new {
851811
let mut lex = GlyphTokenizer::new("@sup_eps@W");
852812
assert_eq!(
853813
lex.get_next_spanned_token(),
854-
Some((Ok(Token::SuperscriptSymexSyllable("ε".to_string())), 0..9))
814+
Some((
815+
Ok(Token::SymexSyllable(Script::Super, "ε".to_string())),
816+
0..9
817+
))
855818
);
856819
assert_eq!(
857820
lex.get_next_spanned_token(),
858-
Some((Ok(Token::NormalSymexSyllable("W".to_string())), 9..10))
821+
Some((
822+
Ok(Token::SymexSyllable(Script::Normal, "W".to_string())),
823+
9..10
824+
))
859825
);
860826
assert_eq!(lex.get_next_spanned_token(), None);
861827
}
@@ -869,11 +835,17 @@ mod lexer_impl_new {
869835
let mut lex = GlyphTokenizer::new("W Q");
870836
assert_eq!(
871837
lex.get_next_spanned_token(),
872-
Some((Ok(Token::NormalSymexSyllable("W".to_string())), 0..1))
838+
Some((
839+
Ok(Token::SymexSyllable(Script::Normal, "W".to_string())),
840+
0..1
841+
))
873842
);
874843
assert_eq!(
875844
lex.get_next_spanned_token(),
876-
Some((Ok(Token::NormalSymexSyllable("Q".to_string())), 2..3))
845+
Some((
846+
Ok(Token::SymexSyllable(Script::Normal, "Q".to_string())),
847+
2..3
848+
))
877849
);
878850
assert_eq!(lex.get_next_spanned_token(), None);
879851
}

0 commit comments

Comments
 (0)