ratmice · ratmice · Mar 19, 2020 · Mar 21, 2020 · Mar 21, 2020 · Mar 21, 2020
diff --git a/Cargo.toml b/Cargo.toml
@@ -14,6 +14,7 @@ regex = "1"
 logos-derive = "0.9.7"
 codespan-reporting = "0.9" 
 structopt = "0.3.12"
+rowan = "0.9"
 
 [features]
 # So this flag allows you to add the #[ignore] attribute at compile time to known failing tests.

diff --git a/src/error.rs b/src/error.rs
@@ -1,5 +1,6 @@
 use crate::lex;
-pub type Error<'a> = lalrpop_util::ParseError<usize, lex::Token<'a>, lex::LexicalError>;
+use crate::token_wrap;
+pub type Error<'a> = lalrpop_util::ParseError<usize, token_wrap::Token<'a>, lex::LexicalError>;
 
 #[derive(Debug)]
 pub enum MainError {

diff --git a/src/lex.rs b/src/lex.rs
@@ -1,29 +1,26 @@
 use logos::Logos;
 use std::ops::Range;
 
-#[derive(Debug, Clone)]
-pub enum Token<'a> {
-    Dot,
-    Semi,
-    Colon,
-    LParen,
-    RParen,
-    Bot,
-    Top,
-    Disj,
-    Conj,
-    Abs,
-    Neg,
-    Iff,
-    Arrow,
-    Def,
-    Name(&'a str),
+pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
+
+#[derive(Debug)]
+pub struct LexicalError(pub Range<usize>);
+
+impl std::fmt::Display for LexicalError {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "lexical error at {:?}", self.0)
+    }
 }
 
-// Notably absent from the above, present in the below are
-// Whitespace, EOF, LexError
-#[derive(Logos, Debug)]
-enum _Token_ {
+#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[repr(u16)]
+pub enum LexToken {
+    // Unfortunately Logos derive doesn't let us use
+    // I presume it might be ensuring that itself, and actively stopping
+    // people from doing e.e. EOF = 1 and messing up its internal indexing.
+    // I'm not certain yet whether this is something that can be relied on.
+    //
+    // EOF = 0,
     #[end]
     EOF,
 
@@ -79,11 +76,24 @@ enum _Token_ {
 
     #[token = "("]
     LParen,
+
     #[token = ")"]
     RParen,
 
-    // Name ↔ Name
-    #[regex = r"[a-zA-Z][_a-zA-Z0-9]*"]
+    // Ideally we would have:
+    //   // Name ↔ Name
+    //   #[regex = r"[a-zA-Z][_a-zA-Z0-9]*"]
+    //   Name,
+    //
+    // as well as
+    //
+    //   // FancyNameUnicode ↔ FancyNameAscii
+    //   #[regex = r"[a-zA-Z ... \p{Greek} ...]"]
+    //   FancyNameUnicode,
+    //
+    // But these regular expressions overlap, and its ambiguous
+    // which one a merely ascii string would match
+    #[regex = r"[a-zA-Z\p{Greek}\x{1d49c}-\x{1d59f}\x{2100}-\x{214f}][_a-zA-Z0-9\x{207f}-\x{2089}\x{2090}-\x{209c}\x{1d62}-\x{1d6a}]*"]
     Name,
 
     // Since this uses Coptic letters for keywords all greek letters can be used as variable names.
@@ -113,8 +123,6 @@ enum _Token_ {
     // FancyNameAscii ↔ FancyNameUnicode
     #[regex = r"[\\][a-zA-Z][_a-zA-Z0-9]*"]
     FancyNameAscii,
-    #[regex = r"[a-zA-Z\p{Greek}\x{1d49c}-\x{1d59f}\x{2100}-\x{214f}][_a-zA-Z0-9\x{207f}-\x{2089}\x{2090}-\x{209c}\x{1d62}-\x{1d6a}]*"]
-    FancyNameUnicode,
 
     #[token = ":"]
     Colon,
@@ -130,82 +138,7 @@ enum _Token_ {
 
     #[error]
     LexError,
-}
-
-impl<'a> std::fmt::Display for Token<'a> {
-    #[rustfmt::skip]
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match self {
-            Token::Dot => write!(f, "."), 
-            Token::Abs => write!(f, "ⲗ"),
-            Token::Bot => write!(f, "⊥"),
-            Token::Def => write!(f, "≔"),
-            Token::Iff => write!(f, "↔"),
-            Token::Neg => write!(f, "¬"),
-            Token::Top => write!(f, "⊤"),
-            Token::Conj => write!(f, "∧"),
-            Token::Disj => write!(f, "∨"),
-            Token::Semi => write!(f, ";"),
-            Token::Arrow => write!(f, "→"),
-            Token::Colon => write!(f, ":"),
-            Token::LParen => write!(f, "("),
-            Token::RParen => write!(f, ")"),
-            Token::Name(s)      => write!(f, "{}", s),
-        }
-    }
-}
-
-#[derive(Debug)]
-pub struct LexicalError(pub Range<usize>);
 
-impl std::fmt::Display for LexicalError {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "lexical error at {:?}", self.0)
-    }
-}
-
-pub struct Tokens<'a>(logos::Lexer<_Token_, &'a str>);
-pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
-
-impl<'a> Tokens<'a> {
-    pub fn from_string(source: &'a str) -> Tokens<'a> {
-        Tokens(_Token_::lexer(source))
-    }
-}
-
-impl<'a> Iterator for Tokens<'a> {
-    type Item = Spanned<Token<'a>, usize, LexicalError>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let lex = &mut self.0;
-        let range = lex.range();
-        let ok = |tok: Token<'a>| Ok((range.start, tok, range.end));
-        let token = loop {
-            match &lex.token {
-                _Token_::Whitespace | _Token_::Comment => lex.advance(),
-                _Token_::EOF => return None,
-                _Token_::LexError => break Err(LexicalError(range)),
-                _Token_::Name => break ok(Token::Name(lex.slice())),
-                _Token_::FancyNameAscii => break ok(Token::Name(lex.slice())),
-                _Token_::FancyNameUnicode => break ok(Token::Name(lex.slice())),
-                // And the rest are all unary members
-                _Token_::Dot => break ok(Token::Dot),
-                _Token_::Abs => break ok(Token::Abs),
-                _Token_::Bot => break ok(Token::Bot),
-                _Token_::Top => break ok(Token::Top),
-                _Token_::Neg => break ok(Token::Neg),
-                _Token_::Iff => break ok(Token::Iff),
-                _Token_::Def => break ok(Token::Def),
-                _Token_::Disj => break ok(Token::Disj),
-                _Token_::Conj => break ok(Token::Conj),
-                _Token_::Semi => break ok(Token::Semi),
-                _Token_::Arrow => break ok(Token::Arrow),
-                _Token_::Colon => break ok(Token::Colon),
-                _Token_::LParen => break ok(Token::LParen),
-                _Token_::RParen => break ok(Token::RParen),
-            }
-        };
-        lex.advance();
-        Some(token)
-    }
+    Binder,
+    Root,
 }
diff --git a/src/main.rs b/src/main.rs
@@ -2,8 +2,10 @@ mod ast;
 mod codespan;
 mod error;
 mod lex;
+mod rowan_token;
 #[cfg(test)]
 mod test_util;
+mod token_wrap;
 
 use codespan_reporting::term::termcolor::StandardStream;
 use codespan_reporting::term::{self, ColorArg};
@@ -32,6 +34,14 @@ mod parser {
     pub use prop::*;
 }
 
+mod rowan_parser {
+    // Hack to avoid clippy lints in generated code.
+    #![allow(clippy::all)]
+    use lalrpop_util::lalrpop_mod;
+    lalrpop_mod!(rowan_prop);
+    pub use rowan_prop::*;
+}
+
 fn print_errors<'a>(result: Result<(), Vec<(&'a str, Error<'a>)>>) -> Result<(), MainError> {
     match result {
         Ok(()) => Ok(()),
@@ -112,7 +122,12 @@ fn bad_unicode() -> () {
     ];
 
     for s in invalid_source.iter() {
-        match parser::propParser::new().parse(lex::Tokens::from_string(s)) {
+        let tokens = token_wrap::Tokens::from_string(s);
+        let tokens = tokens.map(|x| {
+            println!("{:?}", x);
+            x
+        });
+        match parser::propParser::new().parse(tokens) {
             Ok(_) => panic!(format!("accepted '{}'", s)),
             Err(e) => println!("got an expected error: {:?}", e),
         }
@@ -143,7 +158,7 @@ fn bad_ascii() -> Result<(), &'static str> {
 
     let mut num_fail = 0;
     for s in invalid_source.iter() {
-        let lexer = lex::Tokens::from_string(&s);
+        let lexer = token_wrap::Tokens::from_string(&s);
         match parser::propParser::new().parse(lexer) {
             Ok(_) => {
                 // bad
@@ -164,13 +179,27 @@ fn bad_ascii() -> Result<(), &'static str> {
     }
 }
 
+fn from_rowan<'a>(s: &'a str) -> Result<(), MainError> {
+    let tokens = rowan_token::Tokens::from_string(&s);
+    let mut builder = rowan::GreenNodeBuilder::new();
+    let parse_result = rowan_parser::propParser::new().parse(&mut builder, tokens);
+    match parse_result {
+        Err(e) => {
+            println!("{:?}", e);
+            Err(MainError::SomethingWentAwryAndStuffWasPrinted)
+        }
+        _ => Ok(()),
+    }
+}
+
 fn main() -> Result<(), MainError> {
     let mut buf = std::io::BufReader::new(std::io::stdin());
     let mut s = Box::new(String::new());
 
     // Not really how i'd like this to be.
     buf.read_to_string(&mut s)?;
-    let lexer = lex::Tokens::from_string(&s);
+    let lexer = token_wrap::Tokens::from_string(&s);
+    from_rowan(&s)?;
     let parse_result = parser::propParser::new().parse(lexer);
 
     match parse_result {

diff --git a/src/prop.lalrpop b/src/prop.lalrpop
@@ -1,30 +1,32 @@
 // Bah humbug the auto-generated sources have comments above this comment.
 // #![allow(clippy::all)]
 use crate::lex;
+use crate::token_wrap;
 use crate::ast::{Prop, Expr, Binding, Typ};
 use std::rc::Rc;
+
 grammar<'a>;
 
 extern {
   type Location = usize;
   type Error = lex::LexicalError;
 
-  enum lex::Token<'a> {
-	"⊥" => lex::Token::Bot,
-	"." => lex::Token::Dot,
-	"≔" => lex::Token::Def,
-	"→" => lex::Token::Arrow,
-	"↔" => lex::Token::Iff,
-	"¬" => lex::Token::Neg,
-	"ⲗ" => lex::Token::Abs,
-	"∧" => lex::Token::Conj,
-	"∨" => lex::Token::Disj,
-	"⊤" => lex::Token::Top,
-	"(" => lex::Token::LParen,
-	")" => lex::Token::RParen,
-	":" => lex::Token::Colon,
-	";" => lex::Token::Semi,
-	name => lex::Token::Name(<&'a str>),
+  enum token_wrap::Token<'a> {
+	"⊥" => token_wrap::Token::Bot,
+	"." => token_wrap::Token::Dot,
+	"≔" => token_wrap::Token::Def,
+	"→" => token_wrap::Token::Arrow,
+	"↔" => token_wrap::Token::Iff,
+	"¬" => token_wrap::Token::Neg,
+	"ⲗ" => token_wrap::Token::Abs,
+	"∧" => token_wrap::Token::Conj,
+	"∨" => token_wrap::Token::Disj,
+	"⊤" => token_wrap::Token::Top,
+	"(" => token_wrap::Token::LParen,
+	")" => token_wrap::Token::RParen,
+	":" => token_wrap::Token::Colon,
+	";" => token_wrap::Token::Semi,
+	name => token_wrap::Token::Name(<&'a str>),
   }
 }