@@ -7,8 +7,8 @@ use typst_translator::TypstTranslator;
7
7
use harper_core:: { Token , parsers:: Parser } ;
8
8
use itertools:: Itertools ;
9
9
use typst_syntax:: {
10
- Source ,
11
- ast:: { AstNode , Markup } ,
10
+ Source , SyntaxNode ,
11
+ ast:: { AstNode , Expr , Markup } ,
12
12
} ;
13
13
14
14
/// A parser that wraps Harper's `PlainEnglish` parser allowing one to ingest Typst files.
@@ -25,14 +25,61 @@ impl Parser for Typst {
25
25
26
26
// Recurse through AST to create tokens
27
27
let parse_helper = TypstTranslator :: new ( & typst_document) ;
28
- typst_tree
29
- . exprs ( )
28
+ let mut buf = Vec :: new ( ) ;
29
+ let exprs = typst_tree. exprs ( ) . collect_vec ( ) ;
30
+ let exprs = convert_parbreaks ( & mut buf, & exprs) ;
31
+ exprs
32
+ . into_iter ( )
30
33
. filter_map ( |ex| parse_helper. parse_expr ( ex, OffsetCursor :: new ( & typst_document) ) )
31
34
. flatten ( )
32
35
. collect_vec ( )
33
36
}
34
37
}
35
38
39
+ /// Converts newlines after certain elements to paragraph breaks
40
+ /// This is accomplished here instead of in the translating module because at this point there is
41
+ /// still semantic information associated with the elements.
42
+ ///
43
+ /// Newlines are separate expressions in the parse tree (as the Space variant)
44
+ fn convert_parbreaks < ' a > ( buf : & ' a mut Vec < SyntaxNode > , exprs : & ' a [ Expr ] ) -> Vec < Expr < ' a > > {
45
+ // Owned collection of nodes forcibly casted to paragraph breaks
46
+ * buf = exprs
47
+ . iter ( )
48
+ . map ( |e| {
49
+ let mut node = SyntaxNode :: placeholder ( typst_syntax:: SyntaxKind :: Parbreak ) ;
50
+ node. synthesize ( e. span ( ) ) ;
51
+ node
52
+ } )
53
+ . collect_vec ( ) ;
54
+
55
+ let should_parbreak = |e1, e2, e3| {
56
+ matches ! ( e2, Expr :: Space ( _) )
57
+ && ( matches ! ( e1, Expr :: Heading ( _) | Expr :: List ( _) )
58
+ || matches ! ( e3, Expr :: Heading ( _) | Expr :: List ( _) ) )
59
+ } ;
60
+
61
+ let mut res: Vec < Expr > = Vec :: new ( ) ;
62
+ let mut last_element: Option < Expr > = None ;
63
+ for ( ( i, expr) , ( _, next_expr) ) in exprs. iter ( ) . enumerate ( ) . tuple_windows ( ) {
64
+ let mut current_expr = * expr;
65
+ if let Some ( last_element) = last_element {
66
+ if should_parbreak ( last_element, * expr, * next_expr) {
67
+ let pbreak = typst_syntax:: ast:: Parbreak :: from_untyped ( & buf[ i] )
68
+ . expect ( "Unable to convert expression to Parbreak" ) ;
69
+ current_expr = Expr :: Parbreak ( pbreak) ;
70
+ }
71
+ }
72
+ res. push ( current_expr) ;
73
+ last_element = Some ( * expr)
74
+ }
75
+ // Push last element because it will be excluded by tuple_windows() above
76
+ if let Some ( last) = exprs. iter ( ) . last ( ) {
77
+ res. push ( * last) ;
78
+ }
79
+
80
+ res
81
+ }
82
+
36
83
#[ cfg( test) ]
37
84
mod tests {
38
85
use itertools:: Itertools ;
@@ -201,7 +248,7 @@ mod tests {
201
248
& token_kinds. as_slice( ) ,
202
249
& [
203
250
TokenKind :: Word ( _) ,
204
- TokenKind :: Newline ( 1 ) ,
251
+ TokenKind :: ParagraphBreak ,
205
252
TokenKind :: Word ( _)
206
253
]
207
254
) )
@@ -228,10 +275,10 @@ mod tests {
228
275
}
229
276
230
277
#[ test]
231
- fn label_unlintable ( ) {
278
+ fn label_ref_unlintable ( ) {
232
279
let source = "= Header
233
280
<label>
234
- Paragraph" ;
281
+ Paragraph @label " ;
235
282
236
283
let document = Document :: new_curated ( source, & Typst ) ;
237
284
let token_kinds = document. tokens ( ) . map ( |t| t. kind ) . collect_vec ( ) ;
@@ -241,10 +288,12 @@ mod tests {
241
288
& token_kinds. as_slice( ) ,
242
289
& [
243
290
TokenKind :: Word ( _) ,
244
- TokenKind :: Newline ( 1 ) ,
291
+ TokenKind :: ParagraphBreak ,
245
292
TokenKind :: Unlintable ,
246
- TokenKind :: Newline ( 1 ) ,
293
+ TokenKind :: Newline ( _ ) ,
247
294
TokenKind :: Word ( _) ,
295
+ TokenKind :: Space ( _) ,
296
+ TokenKind :: Unlintable ,
248
297
]
249
298
) )
250
299
}
@@ -309,4 +358,64 @@ mod tests {
309
358
]
310
359
) ) ;
311
360
}
361
+
362
+ #[ test]
363
+ fn newline_in_paragraph ( ) {
364
+ let source = "Paragraph with
365
+ newlines
366
+ not paragraph breaks" ;
367
+
368
+ let document = Document :: new_curated ( source, & Typst ) ;
369
+ let token_kinds = document. tokens ( ) . map ( |t| t. kind ) . collect_vec ( ) ;
370
+ dbg ! ( & token_kinds) ;
371
+
372
+ assert ! ( matches!(
373
+ & token_kinds. as_slice( ) ,
374
+ & [
375
+ TokenKind :: Word ( _) , // Paragraph
376
+ TokenKind :: Space ( _) ,
377
+ TokenKind :: Word ( _) , // with
378
+ TokenKind :: Newline ( 1 ) ,
379
+ TokenKind :: Word ( _) , // newlines
380
+ TokenKind :: Newline ( 1 ) ,
381
+ TokenKind :: Word ( _) , // not
382
+ TokenKind :: Space ( _) ,
383
+ TokenKind :: Word ( _) , // paragraph
384
+ TokenKind :: Space ( _) ,
385
+ TokenKind :: Word ( _) , // breaks
386
+ ]
387
+ ) )
388
+ }
389
+
390
+ #[ test]
391
+ fn parbreaks_in_list ( ) {
392
+ let source = "This is a list:
393
+ - p1
394
+ - p2
395
+ - p3" ;
396
+
397
+ let document = Document :: new_curated ( source, & Typst ) ;
398
+ let token_kinds = document. tokens ( ) . map ( |t| t. kind ) . collect_vec ( ) ;
399
+ dbg ! ( & token_kinds) ;
400
+
401
+ assert ! ( matches!(
402
+ & token_kinds. as_slice( ) ,
403
+ & [
404
+ TokenKind :: Word ( _) , // This
405
+ TokenKind :: Space ( _) ,
406
+ TokenKind :: Word ( _) , // is
407
+ TokenKind :: Space ( _) ,
408
+ TokenKind :: Word ( _) , // a
409
+ TokenKind :: Space ( _) ,
410
+ TokenKind :: Word ( _) , // list
411
+ TokenKind :: Punctuation ( Punctuation :: Colon ) ,
412
+ TokenKind :: ParagraphBreak ,
413
+ TokenKind :: Word ( _) ,
414
+ TokenKind :: ParagraphBreak ,
415
+ TokenKind :: Word ( _) ,
416
+ TokenKind :: ParagraphBreak ,
417
+ TokenKind :: Word ( _)
418
+ ]
419
+ ) )
420
+ }
312
421
}
0 commit comments