Skip to content

Having a look at integrating rowan/logos/lalrpop #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ regex = "1"
logos-derive = "0.9.7"
codespan-reporting = "0.9"
structopt = "0.3.12"
rowan = "0.9"

[features]
# So this flag allows you to add the #[ignore] attribute at compile time to known failing tests.
Expand Down
3 changes: 2 additions & 1 deletion src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::lex;
pub type Error<'a> = lalrpop_util::ParseError<usize, lex::Token<'a>, lex::LexicalError>;
use crate::token_wrap;
pub type Error<'a> = lalrpop_util::ParseError<usize, token_wrap::Token<'a>, lex::LexicalError>;

#[derive(Debug)]
pub enum MainError {
Expand Down
137 changes: 35 additions & 102 deletions src/lex.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,26 @@
use logos::Logos;
use std::ops::Range;

#[derive(Debug, Clone)]
pub enum Token<'a> {
Dot,
Semi,
Colon,
LParen,
RParen,
Bot,
Top,
Disj,
Conj,
Abs,
Neg,
Iff,
Arrow,
Def,
Name(&'a str),
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;

#[derive(Debug)]
pub struct LexicalError(pub Range<usize>);

impl std::fmt::Display for LexicalError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "lexical error at {:?}", self.0)
}
}

// Notably absent from the above, present in the below are
// Whitespace, EOF, LexError
#[derive(Logos, Debug)]
enum _Token_ {
#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
pub enum LexToken {
// Unfortunately Logos derive doesn't let us use
// I presume it might be ensuring that itself, and actively stopping
// people from doing e.e. EOF = 1 and messing up its internal indexing.
// I'm not certain yet whether this is something that can be relied on.
//
// EOF = 0,
#[end]
EOF,

Expand Down Expand Up @@ -79,11 +76,24 @@ enum _Token_ {

#[token = "("]
LParen,

#[token = ")"]
RParen,

// Name ↔ Name
#[regex = r"[a-zA-Z][_a-zA-Z0-9]*"]
// Ideally we would have:
// // Name ↔ Name
// #[regex = r"[a-zA-Z][_a-zA-Z0-9]*"]
// Name,
//
// as well as
//
// // FancyNameUnicode ↔ FancyNameAscii
// #[regex = r"[a-zA-Z ... \p{Greek} ...]"]
// FancyNameUnicode,
//
// But these regular expressions overlap, and its ambiguous
// which one a merely ascii string would match
#[regex = r"[a-zA-Z\p{Greek}\x{1d49c}-\x{1d59f}\x{2100}-\x{214f}][_a-zA-Z0-9\x{207f}-\x{2089}\x{2090}-\x{209c}\x{1d62}-\x{1d6a}]*"]
Name,

// Since this uses Coptic letters for keywords all greek letters can be used as variable names.
Expand Down Expand Up @@ -113,8 +123,6 @@ enum _Token_ {
// FancyNameAscii ↔ FancyNameUnicode
#[regex = r"[\\][a-zA-Z][_a-zA-Z0-9]*"]
FancyNameAscii,
#[regex = r"[a-zA-Z\p{Greek}\x{1d49c}-\x{1d59f}\x{2100}-\x{214f}][_a-zA-Z0-9\x{207f}-\x{2089}\x{2090}-\x{209c}\x{1d62}-\x{1d6a}]*"]
FancyNameUnicode,

#[token = ":"]
Colon,
Expand All @@ -130,82 +138,7 @@ enum _Token_ {

#[error]
LexError,
}

impl<'a> std::fmt::Display for Token<'a> {
#[rustfmt::skip]
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Token::Dot => write!(f, "."),
Token::Abs => write!(f, "ⲗ"),
Token::Bot => write!(f, "⊥"),
Token::Def => write!(f, "≔"),
Token::Iff => write!(f, "↔"),
Token::Neg => write!(f, "¬"),
Token::Top => write!(f, "⊤"),
Token::Conj => write!(f, "∧"),
Token::Disj => write!(f, "∨"),
Token::Semi => write!(f, ";"),
Token::Arrow => write!(f, "→"),
Token::Colon => write!(f, ":"),
Token::LParen => write!(f, "("),
Token::RParen => write!(f, ")"),
Token::Name(s) => write!(f, "{}", s),
}
}
}

#[derive(Debug)]
pub struct LexicalError(pub Range<usize>);

impl std::fmt::Display for LexicalError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "lexical error at {:?}", self.0)
}
}

pub struct Tokens<'a>(logos::Lexer<_Token_, &'a str>);
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;

impl<'a> Tokens<'a> {
pub fn from_string(source: &'a str) -> Tokens<'a> {
Tokens(_Token_::lexer(source))
}
}

impl<'a> Iterator for Tokens<'a> {
type Item = Spanned<Token<'a>, usize, LexicalError>;

fn next(&mut self) -> Option<Self::Item> {
let lex = &mut self.0;
let range = lex.range();
let ok = |tok: Token<'a>| Ok((range.start, tok, range.end));
let token = loop {
match &lex.token {
_Token_::Whitespace | _Token_::Comment => lex.advance(),
_Token_::EOF => return None,
_Token_::LexError => break Err(LexicalError(range)),
_Token_::Name => break ok(Token::Name(lex.slice())),
_Token_::FancyNameAscii => break ok(Token::Name(lex.slice())),
_Token_::FancyNameUnicode => break ok(Token::Name(lex.slice())),
// And the rest are all unary members
_Token_::Dot => break ok(Token::Dot),
_Token_::Abs => break ok(Token::Abs),
_Token_::Bot => break ok(Token::Bot),
_Token_::Top => break ok(Token::Top),
_Token_::Neg => break ok(Token::Neg),
_Token_::Iff => break ok(Token::Iff),
_Token_::Def => break ok(Token::Def),
_Token_::Disj => break ok(Token::Disj),
_Token_::Conj => break ok(Token::Conj),
_Token_::Semi => break ok(Token::Semi),
_Token_::Arrow => break ok(Token::Arrow),
_Token_::Colon => break ok(Token::Colon),
_Token_::LParen => break ok(Token::LParen),
_Token_::RParen => break ok(Token::RParen),
}
};
lex.advance();
Some(token)
}
Binder,
Root,
}
35 changes: 32 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ mod ast;
mod codespan;
mod error;
mod lex;
mod rowan_token;
#[cfg(test)]
mod test_util;
mod token_wrap;

use codespan_reporting::term::termcolor::StandardStream;
use codespan_reporting::term::{self, ColorArg};
Expand Down Expand Up @@ -32,6 +34,14 @@ mod parser {
pub use prop::*;
}

mod rowan_parser {
// Hack to avoid clippy lints in generated code.
#![allow(clippy::all)]
use lalrpop_util::lalrpop_mod;
lalrpop_mod!(rowan_prop);
pub use rowan_prop::*;
}

fn print_errors<'a>(result: Result<(), Vec<(&'a str, Error<'a>)>>) -> Result<(), MainError> {
match result {
Ok(()) => Ok(()),
Expand Down Expand Up @@ -112,7 +122,12 @@ fn bad_unicode() -> () {
];

for s in invalid_source.iter() {
match parser::propParser::new().parse(lex::Tokens::from_string(s)) {
let tokens = token_wrap::Tokens::from_string(s);
let tokens = tokens.map(|x| {
println!("{:?}", x);
x
});
match parser::propParser::new().parse(tokens) {
Ok(_) => panic!(format!("accepted '{}'", s)),
Err(e) => println!("got an expected error: {:?}", e),
}
Expand Down Expand Up @@ -143,7 +158,7 @@ fn bad_ascii() -> Result<(), &'static str> {

let mut num_fail = 0;
for s in invalid_source.iter() {
let lexer = lex::Tokens::from_string(&s);
let lexer = token_wrap::Tokens::from_string(&s);
match parser::propParser::new().parse(lexer) {
Ok(_) => {
// bad
Expand All @@ -164,13 +179,27 @@ fn bad_ascii() -> Result<(), &'static str> {
}
}

fn from_rowan<'a>(s: &'a str) -> Result<(), MainError> {
let tokens = rowan_token::Tokens::from_string(&s);
let mut builder = rowan::GreenNodeBuilder::new();
let parse_result = rowan_parser::propParser::new().parse(&mut builder, tokens);
match parse_result {
Err(e) => {
println!("{:?}", e);
Err(MainError::SomethingWentAwryAndStuffWasPrinted)
}
_ => Ok(()),
}
}

fn main() -> Result<(), MainError> {
let mut buf = std::io::BufReader::new(std::io::stdin());
let mut s = Box::new(String::new());

// Not really how i'd like this to be.
buf.read_to_string(&mut s)?;
let lexer = lex::Tokens::from_string(&s);
let lexer = token_wrap::Tokens::from_string(&s);
from_rowan(&s)?;
let parse_result = parser::propParser::new().parse(lexer);

match parse_result {
Expand Down
34 changes: 18 additions & 16 deletions src/prop.lalrpop
Original file line number Diff line number Diff line change
@@ -1,30 +1,32 @@
// Bah humbug the auto-generated sources have comments above this comment.
// #![allow(clippy::all)]
use crate::lex;
use crate::token_wrap;
use crate::ast::{Prop, Expr, Binding, Typ};
use std::rc::Rc;

grammar<'a>;

extern {
type Location = usize;
type Error = lex::LexicalError;

enum lex::Token<'a> {
"⊥" => lex::Token::Bot,
"." => lex::Token::Dot,
"≔" => lex::Token::Def,
"→" => lex::Token::Arrow,
"↔" => lex::Token::Iff,
"¬" => lex::Token::Neg,
"ⲗ" => lex::Token::Abs,
"∧" => lex::Token::Conj,
"∨" => lex::Token::Disj,
"⊤" => lex::Token::Top,
"(" => lex::Token::LParen,
")" => lex::Token::RParen,
":" => lex::Token::Colon,
";" => lex::Token::Semi,
name => lex::Token::Name(<&'a str>),
enum token_wrap::Token<'a> {
"⊥" => token_wrap::Token::Bot,
"." => token_wrap::Token::Dot,
"≔" => token_wrap::Token::Def,
"→" => token_wrap::Token::Arrow,
"↔" => token_wrap::Token::Iff,
"¬" => token_wrap::Token::Neg,
"ⲗ" => token_wrap::Token::Abs,
"∧" => token_wrap::Token::Conj,
"∨" => token_wrap::Token::Disj,
"⊤" => token_wrap::Token::Top,
"(" => token_wrap::Token::LParen,
")" => token_wrap::Token::RParen,
":" => token_wrap::Token::Colon,
";" => token_wrap::Token::Semi,
name => token_wrap::Token::Name(<&'a str>),
}
}

Expand Down
Loading