77import os
88import glob
99import yaml
10+ import re
11+ import subprocess
12+ import tempfile
1013
1114path_to = f'src/content/blog/{ datetime .datetime .now ().strftime ("%Y-%m-%d" )} '
1215
@@ -84,35 +87,185 @@ def outline(topic):
8487def write_from_outline (outline ):
8588 global deepseek , existing_posts_text
8689 return generate ([
87- {"role" : "system" , "content" : "你是一位专业技术博客作者。在写作时请遵循以下中文排版规范:1) 中文与英文、数字之间需要有空格;2) 中文标点与英文、数字之间不加空格;3) 使用全角中文标点;4) 专有名词大小写正确;5) 英文、数字使用半角字符;6) 使用直角引号「」。" },
88- {"role" : "user" , "content" : f"{ outline } \n \n 根据这个提纲中关于技术知识的部分,写出一篇技术博客文章。文章中避免出现图片,避免使用列表 。每一段出现的代码都进行较为详细的解读。在讲述内容时尽量使用段落的语言,语言风格可以略偏专业,但保持清晰。使用Markdown(要求符合Common Markdown规范)输出,使用LaTeX公式(注意:数学的开闭定界符前后不能有字母或数字字符。像x$a + b = c$或$a + b = c$1将无法渲染为数学公式(所有$会被渲染为$);但x $\\ infty$ 1和($\\ infty$)会正常渲染),标题尽量只用一级标题 `#` 和二级标题 `##`,不要用分割线。请遵循中文排版规范,确保中英文之间有空格 ,使用正确的标点符号。直接输出正文。" }
90+ {"role" : "system" , "content" : "你是一位专业技术博客作者。在写作时请遵循以下中文排版规范:使用全角中文标点;专有名词大小写正确;英文、数字使用半角字符;使用直角引号「」。" },
91+ {"role" : "user" , "content" : f"{ outline } \n \n 根据这个提纲中关于技术知识的部分,写出一篇技术博客文章。文章中避免出现图片,不能使用任何列表 。每一段出现的代码都进行较为详细的解读。在讲述内容时尽量使用段落的语言,语言风格可以略偏专业,但保持清晰。使用Markdown(要求符合Common Markdown规范)输出,使用LaTeX公式(注意:数学的开闭定界符前后不能有字母或数字字符。像x$a + b = c$或$a + b = c$1将无法渲染为数学公式(所有$会被渲染为$);但x $\\ infty$ 1和($\\ infty$)会正常渲染),标题尽量只用一级标题 `#` 和二级标题 `##`,不要用分割线。请遵循中文排版规范,使用正确的标点符号。直接输出正文。" }
8992 ], deepseek , "deepseek-reasoner" )
9093
9194def summary (article ):
9295 global deepseek
9396 return generate ([
94- {"role" : "system" , "content" : "你是一个技术博客简介写作者,简介不一定需要涵盖文章的全部内容,能起到一定的提示作用即可。直接输出简介。遵循以下中文排版规范:1) 中文与英文、数字之间需要有空格;2) 中文标点与英文、数字之间不加空格;3) 使用全角中文标点;4) 专有名词大小写正确;5) 英文、数字使用半角字符。注意简介被作为副标题使用,不是一句句子,不要以句号结尾。" },
97+ {"role" : "system" , "content" : "你是一个技术博客简介写作者,简介不一定需要涵盖文章的全部内容,能起到一定的提示作用即可。直接输出简介。遵循以下中文排版规范:使用全角中文标点;专有名词大小写正确;英文、数字使用半角字符。注意简介被作为副标题使用,不是一句句子,不要以句号结尾。" },
9598 {"role" : "user" , "content" : f"给这篇文章写一个15字的简短介绍:\n \n { article } " }
9699 ], deepseek , "deepseek-chat" )
97100
101+ # LaTeX error handling
102+ def remove_latex_comments (latex_str : str ) -> str :
103+ lines = latex_str .splitlines ()
104+ cleaned_lines = []
105+ for line in lines :
106+ m = re .search (r'(?<!\\)%' , line )
107+ if m :
108+ line = line [:m .start ()]
109+ cleaned_lines .append (line )
110+ return "\n " .join (cleaned_lines )
111+
112+ def check_balanced_braces (latex_str : str ) -> (bool , list ):
113+ stack = []
114+ errors = []
115+ for index , char in enumerate (latex_str ):
116+ if char == '{' :
117+ stack .append (index )
118+ elif char == '}' :
119+ if not stack :
120+ errors .append (f"位置 { index } : 右大括号 '}}' 没有对应的左大括号" )
121+ else :
122+ stack .pop ()
123+ if stack :
124+ for pos in stack :
125+ errors .append (f"位置 { pos } : 左大括号 '{{' 没有对应的右大括号" )
126+ return (len (errors ) == 0 ), errors
127+
128+ def check_environment_matching (latex_str : str ) -> (bool , list ):
129+ errors = []
130+ env_stack = []
131+ pattern = re .compile (r'\\(begin|end)\s*{([^}]+)}' )
132+ for m in pattern .finditer (latex_str ):
133+ cmd = m .group (1 )
134+ env = m .group (2 ).strip ()
135+ pos = m .start ()
136+ if cmd == "begin" :
137+ env_stack .append ((env , pos ))
138+ else : # cmd == "end"
139+ if not env_stack :
140+ errors .append (f"位置 { pos } : \\ end{{{ env } }} 没有对应的 \\ begin" )
141+ else :
142+ last_env , last_pos = env_stack .pop ()
143+ if last_env != env :
144+ errors .append (f"位置 { last_pos } 的 \\ begin{{{ last_env } }} 与位置 { pos } 的 \\ end{{{ env } }} 不匹配" )
145+ if env_stack :
146+ for env , pos in env_stack :
147+ errors .append (f"位置 { pos } : \\ begin{{{ env } }} 没有对应的 \\ end" )
148+ return (len (errors ) == 0 ), errors
149+
150+ def run_static_checks (latex_snippet : str ) -> list :
151+ cleaned = remove_latex_comments (latex_snippet )
152+ errors = []
153+ ok_braces , brace_errors = check_balanced_braces (cleaned )
154+ ok_env , env_errors = check_environment_matching (cleaned )
155+ if not ok_braces :
156+ errors .extend (["大括号错误: " + err for err in brace_errors ])
157+ if not ok_env :
158+ errors .extend (["环境匹配错误: " + err for err in env_errors ])
159+ return errors
160+
161+ def check_with_pdflatex (latex_snippet : str ) -> list :
162+ """
163+ call pdflatex for compilation checking and return the error messages detected in the compilation log.
164+ """
165+ template = r"""
166+ \documentclass{article}
167+ \usepackage{amsmath}
168+ \begin{document}
169+ %s
170+ \end{document}
171+ """ % latex_snippet
172+
173+ errors = []
174+ with tempfile .TemporaryDirectory () as tmpdirname :
175+ tex_file = os .path .join (tmpdirname , "temp.tex" )
176+ with open (tex_file , "w" , encoding = "utf-8" ) as f :
177+ f .write (template )
178+ try :
179+ proc = subprocess .run (
180+ ["pdflatex" , "-interaction=nonstopmode" , tex_file ],
181+ stdout = subprocess .PIPE , stderr = subprocess .PIPE ,
182+ cwd = tmpdirname , timeout = 15
183+ )
184+ output = proc .stdout .decode ("utf-8" ) + proc .stderr .decode ("utf-8" )
185+ for line in output .splitlines ():
186+ if line .startswith ("!" ):
187+ errors .append (line .strip ())
188+ if proc .returncode != 0 and not errors :
189+ errors .append ("pdflatex 返回非 0 错误码,编译可能存在问题。" )
190+ except Exception as e :
191+ errors .append (f"调用 pdflatex 编译时出错: { e } " )
192+ return errors
193+
194+ def extract_latex_segments (markdown_text : str ) -> list :
195+ """
196+ extract latex segments from markdown
197+ """
198+ segments = []
199+ block_pattern = re .compile (r'\$\$([\s\S]+?)\$\$' , re .MULTILINE )
200+ segments .extend (block_pattern .findall (markdown_text ))
201+ inline_pattern = re .compile (r'(?<!\$)\$([^$\n]+?)\$(?!\$)' )
202+ segments .extend (inline_pattern .findall (markdown_text ))
203+ return segments
204+
205+ def latex_errors (markdown_text : str ) -> dict :
206+ segments = extract_latex_segments (markdown_text )
207+ report = {}
208+ for idx , seg in enumerate (segments ):
209+ seg = seg .strip ()
210+ static_errors = run_static_checks (seg )
211+ pdflatex_errors = check_with_pdflatex (seg )
212+ report [f"公式段 { idx + 1 } " ] = {
213+ "原始内容" : seg ,
214+ "静态检测错误" : static_errors ,
215+ "pdflatex 检测错误" : pdflatex_errors
216+ }
217+ return report
218+
219+ def modify_latex (markdown_text : str , error ):
220+ global deepseek
221+ return generate ([
222+ {"role" : "system" , "content" : "你是LaTeX校验员。以下是一段Markdown文本,其中的LaTeX代码有错误,请基于报错修正。同时文本要遵循以下中文排版规范:使用全角中文标点;专有名词大小写正确;英文、数字使用半角字符。直接在输出中输出文本内容。" },
223+ {"role" : "user" , "content" : f"<原文>\n { markdown_text } \n </原文>\n \n <报错>\n { error } \n </报错>" }
224+ ], deepseek , "deepseek-reasoner" )
225+
226+ is_latin = lambda ch : '\u0000 ' <= ch <= '\u007F ' or '\u00A0 ' <= ch <= '\u024F '
227+ is_nonspace_latin = lambda ch : is_latin (ch ) and not ch .isspace () and not ch in """*()[]{}"'/-@#"""
228+ is_nonpunct_cjk = lambda ch : not is_latin (ch ) and ch not in "·!¥…()—【】、;:‘’“”,。《》?「」"
229+
230+ def beautify_string (text ):
231+ res = ""
232+ for idx in range (len (text )):
233+ if idx and (
234+ (is_nonspace_latin (text [idx ]) and is_nonpunct_cjk (text [idx - 1 ])) or
235+ (is_nonspace_latin (text [idx - 1 ]) and is_nonpunct_cjk (text [idx ]))
236+ ): res += " "
237+ res += text [idx ]
238+ return res
239+
98240start = time .time ()
99241print (" Generating topic:" )
100- topic = extract_topic (topics_text )
242+ topic = beautify_string ( extract_topic (topics_text ) )
101243print (f" Determined topic: { topic } ; time spent { time .time () - start :.1f} s" )
102244
103245start = time .time ()
104246print (" Generating outline:" )
105- outline_result = outline (topic )
247+ outline_result = beautify_string ( outline (topic ) )
106248print (f" Determined outline: time spent { time .time () - start :.1f} s" )
107249
108250start = time .time ()
109251print (" Generating article:" )
110252article = write_from_outline (outline_result )
111253print (f" Article written: time spent { time .time () - start :.1f} s" )
112254
255+ if latex_errors (article ):
256+ print (" latex_errors exist" )
257+ start = time .time ()
258+ article = modify_latex (article , latex_errors (article ))
259+ print (f" LaTeX errors fixed: time spent { time .time () - start :.1f} s" )
260+
261+ start = time .time ()
262+ article = beautify_string (article )
263+ print (f" Article beautified: time spent { time .time () - start :.1f} s" )
264+
265+
113266start = time .time ()
114267print (" Generating summary:" )
115- summary_result = summary (article )
268+ summary_result = beautify_string ( summary (article ) )
116269print (f" Decided Summary: { summary_result } ; time spent { time .time () - start :.1f} s" )
117270
118271lines = iter (article .split ("\n " ))
@@ -146,4 +299,4 @@ def summary(article):
146299with open (f"{ path_to } /index.md" , "w" , encoding = "utf-8" ) as f :
147300 f .write (markdown_file )
148301
149- print (f" Composed article: { path_to } /index.md" )
302+ print (f" Composed article: { path_to } /index.md" )
0 commit comments