@@ -301,7 +301,7 @@ def render_html(md: str):
301301def render_htmlV2 (md : str ):
302302 """Render HTML from markdown string."""
303303 t = tokenize (md )
304- _ , html = emit_html (t )
304+ _ , _ , html = emit_html (t )
305305 return html
306306
307307
@@ -311,22 +311,45 @@ def tokenize(md: str) -> list:
311311 i = 0
312312 tok = ''
313313 line = []
314+ html = False
314315 while i < len (md ):
315316 c = md [i ]
316- #print(f"{i:2} | {'\\n' if c == '\n' else c} | {tok} | {line}")
317- if c != '#' and not line and tok and tok == len (tok ) * '#' :
318- #
317+ #print(f"#{tok}# @{c}@")
318+ if c == '<' or html :
319+ #
320+ html = True
321+ tok += c
322+ if c == '>' and html :
323+ html = False
324+ elif c != '#' and not line and tok and tok == len (tok ) * '#' :
325+ # Isolate header level
319326 line .append (tok )
320327 tok = '' if c == ' ' else c
328+ elif c != ' ' and not line and tok and tok == len (tok ) * ' ' :
329+ # Isolate consecutive spaces at bol
330+ if len (tok ) >= 4 :
331+ line .append (tok )
332+ tok = c
321333 elif c in '-' and line and line [- 1 ][0 ] in '-' and line [- 1 ] == len (line [- 1 ]) * '-' :
322- #
334+ # Concatenate setext underline
323335 line [- 1 ] = line [- 1 ] + c
324336 elif c == '`' and line and not tok and line [- 1 ] in '``' :
325337 # Concatenate backquotes signs for fenced code
326338 line [- 1 ] = line [- 1 ] + c
327339 elif c in '*_' and line and line [- 1 ][- 1 ] in '*_' and tok == '' :
328340 # Concatenate signs for em & strong styles
329341 line [- 1 ] = line [- 1 ] + c
342+ elif c in '![' and checkLinkOrImage (md , i ):
343+ if c == '[' :
344+ t = 'lnk['
345+ else :
346+ t = 'img['
347+ i += 1
348+ if tok :
349+ line .append (tok )
350+ tok = ''
351+ line .append (t )
352+ line .append ('[' )
330353 elif c in '\n []()-+.>*_`' :
331354 if tok :
332355 line .append (tok )
@@ -347,94 +370,134 @@ def tokenize(md: str) -> list:
347370 res .append (line )
348371 line = []
349372 i += 1
373+ res .append (['' ])
350374 return res
351375
352376
353- def putLineInContext (stack : list , line : list ):
377+ def putLineInContext (stack : list , line : list , list_indent : int ):
354378 """."""
355379 j = 0
356380 k = 0
381+ list_indent = 4 if not list_indent else list_indent
357382 while j < len (line ) and k < len (stack ):
358383 if stack [k ][0 ] == 'u' :
359- j -= 1
360- elif stack [k ] == 'li' :
361- if line [j ] in '-*+' :
384+ if not line [j ]:
362385 break
363- elif stack [k ] == '>' :
386+ else :
387+ j -= 1
388+ elif stack [k ] == 'li' :
389+ if not line [j ] or line [j ] in '-*+' :
390+ if j > 0 and len (line [j - 1 ]) < len (stack ) / 2 * 4 :
391+ break
392+ else :
393+ j -= 1
394+ elif stack [k ] == 'blockquote' :
364395 if line [j ] != '>' :
365396 break
366397 elif stack [k ][0 ] == 'h' :
367398 break
399+ elif stack [k ] == 'fenced' :
400+ if line [j ] == '```' or line [j ] == '~~~' :
401+ j -= 1
402+ break
403+ elif stack [k ] == 'indented' :
404+ if line [j ] != ' ' :
405+ j -= 1
406+ break
407+ #elif stack[k] == 'a':
408+ # if line[j-1] == ')':
409+ # break
368410 elif stack [k ][0 ] == 'p' :
369411 if line [j ] in ['' , '#' , '##' ]:
370412 break
371413 else :
372414 break
373415 j += 1
374416 k += 1
375- bol = 0 if j >= len (line ) - 1 else j
417+ if j > len (line ) - 1 :
418+ bol = 0
419+ else :
420+ bol = j
376421 keepers = - 1 if k >= len (stack ) else k
377422 return (bol , keepers )
378423
379424
380- def detectNewBlock (tok , prev_tok , stack : list ):
425+ def detectNewBlock (tok , prev_tok , stack : list , list_indent : int ):
381426 """."""
382427 if tok and tok == len (tok ) * '#' :
383- return f'h{ len (tok )} ' , f'h{ len (tok )} ' , 1
428+ return f'h{ len (tok )} ' , 1
429+ elif tok and tok == '```' or tok == '~~~' :
430+ return 'fenced' , 1
431+ elif tok and tok == ' ' :
432+ return 'indented' , 1
384433 elif tok and tok in '>' :
385- return '>' , 'blockquote' , 1
434+ return 'blockquote' , 1
435+ elif tok and tok == 'lnk[' and stack [- 1 ] != 'a' :
436+ return 'a' , 0
437+ elif tok and tok == 'img[' and stack [- 1 ] != 'img' :
438+ return 'img' , 0
386439 elif stack and stack [- 1 ][:2 ] == 'ul' and tok in '-*+' :
387- return 'li' , 'li' , 1
440+ return 'li' , 1
388441 elif (not stack or stack [- 1 ][:2 ] != 'ul' ) and tok and tok in '*+-' :
389- return f'ul { len ( prev_tok ) } ' , 'ul' , 0
442+ return 'ul' , 0
390443 elif not stack and tok :
391- return 'p' , 'p' , 0
444+ return 'p' , 0
392445 else :
393- return None , None , 0
446+ return None , 0
447+
394448
449+ def checkLinkOrImage (md : str , start : int ):
450+ """."""
451+ obj = False
452+ url = False
453+ i = start
454+ if md [start ] == '!' and i + 1 < len (md ) and md [i + 1 ] == '[' :
455+ i += 1
456+ while i < len (md ):
457+ if md [i ] == '\n ' :
458+ return False
459+ elif md [i - 1 :i + 1 ] == '](' :
460+ obj = True
461+ elif md [i ] == ')' :
462+ url = True
463+ if obj and url :
464+ return True
465+ i += 1
466+ return False
395467
396- def detectNewOrClosingSpan (tok , stack : list ):
468+
469+ def detectNewOrClosingSpan (tok : str , stack : list ):
397470 """."""
398471 h = [
399- ('`' , 'code' , '`' ),
400- ('_' , 'em' , '_' ), # or *
401- ('__' , 'strong' , '__' ),# or **
472+ ('`' , 'code' , '`' , ['p' ]),
473+ ('_' , 'em' , '_' , ['em' , 'p' , 'obj' ]), # or *
474+ ('__' , 'strong' , '__' , ['p' ]),# or **
475+ ('[' , 'obj' , ']' , ['a' ]),
476+ ('(' , 'url' , ')' , ['a' ]),
402477 ]
403- for opening , element , closing in h :
404- if opening == closing and tok == opening :
478+ for opening , element , closing , auths in h :
479+ if opening == closing and tok == opening :# and element in auths:
405480 if element not in stack :
406481 return 'opening' , element
407482 else :
408483 return 'closing' , element
409- elif tok == opening :
484+ elif tok == opening :# and element in auths:
410485 return 'opening' , element
411- elif tok == closing :
486+ elif tok == closing :# and element in auths:
412487 return 'closing' , element
413488 return None , None
414489
415490
416- #######################################################################
417- # OLD CODE TO RECYCLE
418- #
419- # elif y == '\n' and (z == '```' or z == '~~~'):
420- # if i < len(toks) -1 and toks[i+1] != '\n':
421- # i, r = emit_html(toks, i+2, stack + ['fenced'])
422- # else:
423- # i, r = emit_html(toks, i+1, stack + ['fenced'])
424- # res += f'\n<pre><code>{r}</code></pre>'
425- # elif z == '[':
426- # i, r1 = emit_html(toks, i+1, stack + ['link_text'])
427- # i, r2 = emit_html(toks, i+1, stack + ['link_url'])
428- # res += f'<a href="{r2}">{r1}</a>'
429- # elif stack[-1][:1] == '>' and y == '\n' and z == ' ':
430- # i, r = emit_html(toks, i+1, stack + ['indented'])
431- # res += f'\n<pre><code>{r}</code></pre>'
432- ########################################################################
433-
434-
435- def html_text (element : str , text : str ):
491+ def html_text (element : str , content ):
436492 """."""
437- res = f'\n <{ element } >{ text } </{ element } >'
493+ isBlock = '\n ' if element [0 ] in ['p' , 'h' , 'b' ] else ''
494+ if element in ['fenced' , 'indented' ]:
495+ res = f'\n <pre><code>{ content } </code></pre>'
496+ elif element in ['a' , 'img' ]:
497+ title = f' title="{ content ["title" ]} "' if 'title' in content else ''
498+ res = f'<{ element } href="{ content ["url" ]} "{ title } >{ content ["obj" ]} </a>'
499+ else :
500+ res = f'{ isBlock } <{ element } >{ content } </{ element } >'
438501 return res
439502
440503
@@ -445,14 +508,17 @@ def emit_html(toks: list, lstart = 2, tstart = 0):
445508 isLineCtx = True
446509 i = lstart
447510 j = tstart
511+ list_indent = 0
448512 while i < len (toks ) and j <= len (toks [i ]):
449- print (f'{ i } { j } | { isLineCtx :1} | { "." .join (stack ):15} | | { toks [i ]} ' )
450- print (accu [- 1 ])
513+ print (f'{ i :2} { j :2} | { isLineCtx :1} | { "." .join (stack ):20} | { str (accu [- 1 ])[:40 ].replace ('\n ' ,'.' )} ' )
451514 line = toks [i ]
452515 if not isLineCtx :
453- j , k = putLineInContext (stack , line )
516+ j , k = putLineInContext (stack , line , list_indent )
454517 if 0 <= k <= len (stack ) - 1 :
455- j = 0
518+ if j < 0 :
519+ i += 1
520+ else :
521+ j = 0
456522 elt = stack .pop ()
457523 last = accu .pop ()
458524 accu [- 1 ] += html_text (elt , last )
@@ -461,17 +527,21 @@ def emit_html(toks: list, lstart = 2, tstart = 0):
461527 tok = line [j ] if j < len (line ) else ''
462528 prev_tok = line [j - 1 ] if j > 0 else ''
463529 tst = None
464- node , ht , offset = detectNewBlock (tok , prev_tok , stack )
530+ node , offset = detectNewBlock (tok , prev_tok , stack , list_indent )
531+ list_indent = len (prev_tok ) if node == 'li' and not list_indent else list_indent
465532 if not node :
466533 tst = detectNewOrClosingSpan (tok , stack )
467534
468535 if node :
469536 isLineCtx = True
470537 j += offset
471538 stack += [node ]
472- accu += ['' ]
539+ if node in ['a' , 'img' ]:
540+ accu += [{'url' :'' }]
541+ else :
542+ accu += ['' ]
473543 elif tst and tst [0 ] == 'opening' :
474- isLineCtx = True
544+ # isLineCtx = True
475545 j += 1
476546 stack += [tst [1 ]]
477547 accu += ['' ]
@@ -480,10 +550,23 @@ def emit_html(toks: list, lstart = 2, tstart = 0):
480550 j += 1
481551 elt = stack .pop ()
482552 last = accu .pop ()
483- accu [- 1 ] += html_text (elt , last )
553+ if elt == 'obj' :
554+ accu [- 1 ][elt ] = last
555+ elif elt == 'url' :
556+ tmp = last .split ('"' )
557+ url = tmp [0 ]
558+ if len (tmp ) > 1 :
559+ accu [- 1 ]['title' ] = tmp [1 ]
560+ url = url [:- 1 ]
561+ accu [- 1 ]['url' ] += url
562+ else :
563+ accu [- 1 ] += html_text (elt , last )
564+ if elt in ['a' , 'img' ]:
565+ j -= 1
484566 continue
485567 elif stack :
486- accu [- 1 ] += tok
568+ if stack [- 1 ] not in ['a' , 'img' ]:
569+ accu [- 1 ] += tok
487570 j += 1
488571 else :
489572 j += 1
@@ -492,49 +575,7 @@ def emit_html(toks: list, lstart = 2, tstart = 0):
492575 i += 1
493576 j = 0
494577 isLineCtx = False
495- return i , j , accu [0 ]
496-
497578
498- def emit_html_recursive (toks : list , lstart = 2 , tstart = 0 , stack = [], isLineCtx = True ):
499- """."""
500- res = ''
501- i = lstart
502- j = tstart
503- while i < len (toks ) and j <= len (toks [i ]):
504- print (f'{ i } { j } | { isLineCtx :1} | { "." .join (stack ):15} | { toks [i ]} ' )
505- line = toks [i ]
506- if not isLineCtx :
507- j , k = putLineInContext (stack , line )
508- if 0 <= k <= len (stack ) - 1 :
509- #print(f"back! k={k}")
510- return i , 0 , res
511-
512- tok = line [j ] if j < len (line ) else ''
513- prev_tok = line [j - 1 ] if j > 0 else ''
514- tst = None
515- node , ht , offset = detectNewBlock (tok , prev_tok , stack )
516- if not node :
517- tst = detectNewOrClosingSpan (tok , stack )
518-
519- if node :
520- i , j , r = emit_html_recursive (toks , i , j + offset , stack + [node ])
521- res += html_text (ht , r )
522- elif tst and tst [0 ] == 'opening' :
523- i , j , r = emit_html_recursive (toks , i , j + 1 , stack + [tst [1 ]])
524- res += html_text (tst [1 ], r )
525- elif tst and tst [0 ] == 'closing' :
526- return i , j + 1 , res
527- elif stack :
528- res += tok
529- j += 1
530- else :
531- j += 1
579+ return i , j , accu [0 ]
532580
533- if j >= len (line ):
534- i += 1
535- j = 0
536- if j == 0 :
537- isLineCtx = False
538- print (f'ret { i } { j } ' )
539- return i , j , res
540581
0 commit comments