Skip to content

Commit

Permalink
Merge pull request #442 from grantlemons/typst-corrections
Browse files Browse the repository at this point in the history
Typst Corrections
  • Loading branch information
elijah-potter authored Mar 6, 2025
2 parents c9a2efd + fecaab3 commit 981ac72
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 13 deletions.
127 changes: 118 additions & 9 deletions harper-typst/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use typst_translator::TypstTranslator;
use harper_core::{Token, parsers::Parser};
use itertools::Itertools;
use typst_syntax::{
Source,
ast::{AstNode, Markup},
Source, SyntaxNode,
ast::{AstNode, Expr, Markup},
};

/// A parser that wraps Harper's `PlainEnglish` parser allowing one to ingest Typst files.
Expand All @@ -25,14 +25,61 @@ impl Parser for Typst {

// Recurse through AST to create tokens
let parse_helper = TypstTranslator::new(&typst_document);
typst_tree
.exprs()
let mut buf = Vec::new();
let exprs = typst_tree.exprs().collect_vec();
let exprs = convert_parbreaks(&mut buf, &exprs);
exprs
.into_iter()
.filter_map(|ex| parse_helper.parse_expr(ex, OffsetCursor::new(&typst_document)))
.flatten()
.collect_vec()
}
}

/// Converts newlines after certain elements to paragraph breaks
/// This is accomplished here instead of in the translating module because at this point there is
/// still semantic information associated with the elements.
///
/// Newlines are separate expressions in the parse tree (as the Space variant)
fn convert_parbreaks<'a>(buf: &'a mut Vec<SyntaxNode>, exprs: &'a [Expr]) -> Vec<Expr<'a>> {
// Owned collection of nodes forcibly casted to paragraph breaks
*buf = exprs
.iter()
.map(|e| {
let mut node = SyntaxNode::placeholder(typst_syntax::SyntaxKind::Parbreak);
node.synthesize(e.span());
node
})
.collect_vec();

let should_parbreak = |e1, e2, e3| {
matches!(e2, Expr::Space(_))
&& (matches!(e1, Expr::Heading(_) | Expr::List(_))
|| matches!(e3, Expr::Heading(_) | Expr::List(_)))
};

let mut res: Vec<Expr> = Vec::new();
let mut last_element: Option<Expr> = None;
for ((i, expr), (_, next_expr)) in exprs.iter().enumerate().tuple_windows() {
let mut current_expr = *expr;
if let Some(last_element) = last_element {
if should_parbreak(last_element, *expr, *next_expr) {
let pbreak = typst_syntax::ast::Parbreak::from_untyped(&buf[i])
.expect("Unable to convert expression to Parbreak");
current_expr = Expr::Parbreak(pbreak);
}
}
res.push(current_expr);
last_element = Some(*expr)
}
// Push last element because it will be excluded by tuple_windows() above
if let Some(last) = exprs.iter().last() {
res.push(*last);
}

res
}

#[cfg(test)]
mod tests {
use itertools::Itertools;
Expand Down Expand Up @@ -201,7 +248,7 @@ mod tests {
&token_kinds.as_slice(),
&[
TokenKind::Word(_),
TokenKind::Newline(1),
TokenKind::ParagraphBreak,
TokenKind::Word(_)
]
))
Expand All @@ -228,10 +275,10 @@ mod tests {
}

#[test]
fn label_unlintable() {
fn label_ref_unlintable() {
let source = "= Header
<label>
Paragraph";
Paragraph @label";

let document = Document::new_curated(source, &Typst);
let token_kinds = document.tokens().map(|t| t.kind).collect_vec();
Expand All @@ -241,10 +288,12 @@ mod tests {
&token_kinds.as_slice(),
&[
TokenKind::Word(_),
TokenKind::Newline(1),
TokenKind::ParagraphBreak,
TokenKind::Unlintable,
TokenKind::Newline(1),
TokenKind::Newline(_),
TokenKind::Word(_),
TokenKind::Space(_),
TokenKind::Unlintable,
]
))
}
Expand Down Expand Up @@ -309,4 +358,64 @@ mod tests {
]
));
}

#[test]
fn newline_in_paragraph() {
let source = "Paragraph with
newlines
not paragraph breaks";

let document = Document::new_curated(source, &Typst);
let token_kinds = document.tokens().map(|t| t.kind).collect_vec();
dbg!(&token_kinds);

assert!(matches!(
&token_kinds.as_slice(),
&[
TokenKind::Word(_), // Paragraph
TokenKind::Space(_),
TokenKind::Word(_), // with
TokenKind::Newline(1),
TokenKind::Word(_), // newlines
TokenKind::Newline(1),
TokenKind::Word(_), // not
TokenKind::Space(_),
TokenKind::Word(_), // paragraph
TokenKind::Space(_),
TokenKind::Word(_), // breaks
]
))
}

#[test]
fn parbreaks_in_list() {
let source = "This is a list:
- p1
- p2
- p3";

let document = Document::new_curated(source, &Typst);
let token_kinds = document.tokens().map(|t| t.kind).collect_vec();
dbg!(&token_kinds);

assert!(matches!(
&token_kinds.as_slice(),
&[
TokenKind::Word(_), // This
TokenKind::Space(_),
TokenKind::Word(_), // is
TokenKind::Space(_),
TokenKind::Word(_), // a
TokenKind::Space(_),
TokenKind::Word(_), // list
TokenKind::Punctuation(Punctuation::Colon),
TokenKind::ParagraphBreak,
TokenKind::Word(_),
TokenKind::ParagraphBreak,
TokenKind::Word(_),
TokenKind::ParagraphBreak,
TokenKind::Word(_)
]
))
}
}
14 changes: 10 additions & 4 deletions harper-typst/src/typst_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,16 @@ impl<'a> TypstTranslator<'a> {

// Recurse on each element of an iterator
let iter_recurse = |exprs: &mut dyn Iterator<Item = Expr>| {
Some(exprs.filter_map(|e| recurse!(e)).flatten().collect_vec())
let mut buf = Vec::new();
let exprs = exprs.collect_vec();
let exprs = super::convert_parbreaks(&mut buf, &exprs);
Some(
exprs
.into_iter()
.filter_map(|e| recurse!(e))
.flatten()
.collect_vec(),
)
};

// Parse the parameters of a function or closure
Expand Down Expand Up @@ -225,9 +234,6 @@ impl<'a> TypstTranslator<'a> {
Expr::Strong(strong) => iter_recurse(&mut strong.body().exprs()),
Expr::Emph(emph) => iter_recurse(&mut emph.body().exprs()),
Expr::Link(a) => token!(a, TokenKind::Url),
Expr::Ref(a) => {
token!(a, TokenKind::Word(None))
}
Expr::Heading(heading) => iter_recurse(&mut heading.body().exprs()),
Expr::List(list_item) => iter_recurse(&mut list_item.body().exprs()),
Expr::Enum(enum_item) => iter_recurse(&mut enum_item.body().exprs()),
Expand Down
1 change: 1 addition & 0 deletions harper-typst/tests/run_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ macro_rules! create_test {
create_test!(complex_document.typ, 0);
create_test!(simplified_document.typ, 0);
create_test!(complex_document_with_spelling_mistakes.typ, 4);
// create_test!(issue_399.typ, 3);
15 changes: 15 additions & 0 deletions harper-typst/tests/test_sources/issue_399.typ
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#problem[
4. Find all the $x$ values where the following function is discontinuous.
]

#solution[
$x=-2,0,3$
]

#aside[
at $x=-2$ jump discontinuity.

at $x=0$ infinite discontinuity.

at $x=3$ removable discontinuity. (can be removed via re-defining the domain to exclude that)
]

0 comments on commit 981ac72

Please sign in to comment.