Skip to content

Commit 981ac72

Browse files
Merge pull request #442 from grantlemons/typst-corrections
Typst Corrections
2 parents c9a2efd + fecaab3 commit 981ac72

File tree

4 files changed

+144
-13
lines changed

4 files changed

+144
-13
lines changed

harper-typst/src/lib.rs

Lines changed: 118 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ use typst_translator::TypstTranslator;
77
use harper_core::{Token, parsers::Parser};
88
use itertools::Itertools;
99
use typst_syntax::{
10-
Source,
11-
ast::{AstNode, Markup},
10+
Source, SyntaxNode,
11+
ast::{AstNode, Expr, Markup},
1212
};
1313

1414
/// A parser that wraps Harper's `PlainEnglish` parser allowing one to ingest Typst files.
@@ -25,14 +25,61 @@ impl Parser for Typst {
2525

2626
// Recurse through AST to create tokens
2727
let parse_helper = TypstTranslator::new(&typst_document);
28-
typst_tree
29-
.exprs()
28+
let mut buf = Vec::new();
29+
let exprs = typst_tree.exprs().collect_vec();
30+
let exprs = convert_parbreaks(&mut buf, &exprs);
31+
exprs
32+
.into_iter()
3033
.filter_map(|ex| parse_helper.parse_expr(ex, OffsetCursor::new(&typst_document)))
3134
.flatten()
3235
.collect_vec()
3336
}
3437
}
3538

39+
/// Converts newlines after certain elements to paragraph breaks
40+
/// This is accomplished here instead of in the translating module because at this point there is
41+
/// still semantic information associated with the elements.
42+
///
43+
/// Newlines are separate expressions in the parse tree (as the Space variant)
44+
fn convert_parbreaks<'a>(buf: &'a mut Vec<SyntaxNode>, exprs: &'a [Expr]) -> Vec<Expr<'a>> {
45+
// Owned collection of nodes forcibly casted to paragraph breaks
46+
*buf = exprs
47+
.iter()
48+
.map(|e| {
49+
let mut node = SyntaxNode::placeholder(typst_syntax::SyntaxKind::Parbreak);
50+
node.synthesize(e.span());
51+
node
52+
})
53+
.collect_vec();
54+
55+
let should_parbreak = |e1, e2, e3| {
56+
matches!(e2, Expr::Space(_))
57+
&& (matches!(e1, Expr::Heading(_) | Expr::List(_))
58+
|| matches!(e3, Expr::Heading(_) | Expr::List(_)))
59+
};
60+
61+
let mut res: Vec<Expr> = Vec::new();
62+
let mut last_element: Option<Expr> = None;
63+
for ((i, expr), (_, next_expr)) in exprs.iter().enumerate().tuple_windows() {
64+
let mut current_expr = *expr;
65+
if let Some(last_element) = last_element {
66+
if should_parbreak(last_element, *expr, *next_expr) {
67+
let pbreak = typst_syntax::ast::Parbreak::from_untyped(&buf[i])
68+
.expect("Unable to convert expression to Parbreak");
69+
current_expr = Expr::Parbreak(pbreak);
70+
}
71+
}
72+
res.push(current_expr);
73+
last_element = Some(*expr)
74+
}
75+
// Push last element because it will be excluded by tuple_windows() above
76+
if let Some(last) = exprs.iter().last() {
77+
res.push(*last);
78+
}
79+
80+
res
81+
}
82+
3683
#[cfg(test)]
3784
mod tests {
3885
use itertools::Itertools;
@@ -201,7 +248,7 @@ mod tests {
201248
&token_kinds.as_slice(),
202249
&[
203250
TokenKind::Word(_),
204-
TokenKind::Newline(1),
251+
TokenKind::ParagraphBreak,
205252
TokenKind::Word(_)
206253
]
207254
))
@@ -228,10 +275,10 @@ mod tests {
228275
}
229276

230277
#[test]
231-
fn label_unlintable() {
278+
fn label_ref_unlintable() {
232279
let source = "= Header
233280
<label>
234-
Paragraph";
281+
Paragraph @label";
235282

236283
let document = Document::new_curated(source, &Typst);
237284
let token_kinds = document.tokens().map(|t| t.kind).collect_vec();
@@ -241,10 +288,12 @@ mod tests {
241288
&token_kinds.as_slice(),
242289
&[
243290
TokenKind::Word(_),
244-
TokenKind::Newline(1),
291+
TokenKind::ParagraphBreak,
245292
TokenKind::Unlintable,
246-
TokenKind::Newline(1),
293+
TokenKind::Newline(_),
247294
TokenKind::Word(_),
295+
TokenKind::Space(_),
296+
TokenKind::Unlintable,
248297
]
249298
))
250299
}
@@ -309,4 +358,64 @@ mod tests {
309358
]
310359
));
311360
}
361+
362+
#[test]
363+
fn newline_in_paragraph() {
364+
let source = "Paragraph with
365+
newlines
366+
not paragraph breaks";
367+
368+
let document = Document::new_curated(source, &Typst);
369+
let token_kinds = document.tokens().map(|t| t.kind).collect_vec();
370+
dbg!(&token_kinds);
371+
372+
assert!(matches!(
373+
&token_kinds.as_slice(),
374+
&[
375+
TokenKind::Word(_), // Paragraph
376+
TokenKind::Space(_),
377+
TokenKind::Word(_), // with
378+
TokenKind::Newline(1),
379+
TokenKind::Word(_), // newlines
380+
TokenKind::Newline(1),
381+
TokenKind::Word(_), // not
382+
TokenKind::Space(_),
383+
TokenKind::Word(_), // paragraph
384+
TokenKind::Space(_),
385+
TokenKind::Word(_), // breaks
386+
]
387+
))
388+
}
389+
390+
#[test]
391+
fn parbreaks_in_list() {
392+
let source = "This is a list:
393+
- p1
394+
- p2
395+
- p3";
396+
397+
let document = Document::new_curated(source, &Typst);
398+
let token_kinds = document.tokens().map(|t| t.kind).collect_vec();
399+
dbg!(&token_kinds);
400+
401+
assert!(matches!(
402+
&token_kinds.as_slice(),
403+
&[
404+
TokenKind::Word(_), // This
405+
TokenKind::Space(_),
406+
TokenKind::Word(_), // is
407+
TokenKind::Space(_),
408+
TokenKind::Word(_), // a
409+
TokenKind::Space(_),
410+
TokenKind::Word(_), // list
411+
TokenKind::Punctuation(Punctuation::Colon),
412+
TokenKind::ParagraphBreak,
413+
TokenKind::Word(_),
414+
TokenKind::ParagraphBreak,
415+
TokenKind::Word(_),
416+
TokenKind::ParagraphBreak,
417+
TokenKind::Word(_)
418+
]
419+
))
420+
}
312421
}

harper-typst/src/typst_translator.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,16 @@ impl<'a> TypstTranslator<'a> {
149149

150150
// Recurse on each element of an iterator
151151
let iter_recurse = |exprs: &mut dyn Iterator<Item = Expr>| {
152-
Some(exprs.filter_map(|e| recurse!(e)).flatten().collect_vec())
152+
let mut buf = Vec::new();
153+
let exprs = exprs.collect_vec();
154+
let exprs = super::convert_parbreaks(&mut buf, &exprs);
155+
Some(
156+
exprs
157+
.into_iter()
158+
.filter_map(|e| recurse!(e))
159+
.flatten()
160+
.collect_vec(),
161+
)
153162
};
154163

155164
// Parse the parameters of a function or closure
@@ -225,9 +234,6 @@ impl<'a> TypstTranslator<'a> {
225234
Expr::Strong(strong) => iter_recurse(&mut strong.body().exprs()),
226235
Expr::Emph(emph) => iter_recurse(&mut emph.body().exprs()),
227236
Expr::Link(a) => token!(a, TokenKind::Url),
228-
Expr::Ref(a) => {
229-
token!(a, TokenKind::Word(None))
230-
}
231237
Expr::Heading(heading) => iter_recurse(&mut heading.body().exprs()),
232238
Expr::List(list_item) => iter_recurse(&mut list_item.body().exprs()),
233239
Expr::Enum(enum_item) => iter_recurse(&mut enum_item.body().exprs()),

harper-typst/tests/run_tests.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,4 @@ macro_rules! create_test {
3737
create_test!(complex_document.typ, 0);
3838
create_test!(simplified_document.typ, 0);
3939
create_test!(complex_document_with_spelling_mistakes.typ, 4);
40+
// create_test!(issue_399.typ, 3);
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#problem[
2+
4. Find all the $x$ values where the following function is discontinuous.
3+
]
4+
5+
#solution[
6+
$x=-2,0,3$
7+
]
8+
9+
#aside[
10+
at $x=-2$ jump discontinuity.
11+
12+
at $x=0$ infinite discontinuity.
13+
14+
at $x=3$ removable discontinuity. (can be removed via re-defining the domain to exclude that)
15+
]

0 commit comments

Comments
 (0)