Skip to content

Commit 35675ab

Browse files
committed
.
1 parent 91e708a commit 35675ab

File tree

1 file changed

+140
-1
lines changed

1 file changed

+140
-1
lines changed

writer.py

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,145 @@ def summary(article):
9898
{"role": "user", "content": f"给这篇文章写一个15字的简短介绍:\n\n{article}"}
9999
], deepseek, "deepseek-chat")
100100

101+
# LaTeX error handling
102+
def remove_latex_comments(latex_str: str) -> str:
103+
lines = latex_str.splitlines()
104+
cleaned_lines = []
105+
for line in lines:
106+
m = re.search(r'(?<!\\)%', line)
107+
if m:
108+
line = line[:m.start()]
109+
cleaned_lines.append(line)
110+
return "\n".join(cleaned_lines)
111+
112+
def check_balanced_braces(latex_str: str) -> (bool, list):
113+
stack = []
114+
errors = []
115+
for index, char in enumerate(latex_str):
116+
if char == '{':
117+
stack.append(index)
118+
elif char == '}':
119+
if not stack:
120+
errors.append(f"位置 {index}: 右大括号 '}}' 没有对应的左大括号")
121+
else:
122+
stack.pop()
123+
if stack:
124+
for pos in stack:
125+
errors.append(f"位置 {pos}: 左大括号 '{{' 没有对应的右大括号")
126+
return (len(errors) == 0), errors
127+
128+
def check_environment_matching(latex_str: str) -> (bool, list):
129+
errors = []
130+
env_stack = []
131+
pattern = re.compile(r'\\(begin|end)\s*{([^}]+)}')
132+
for m in pattern.finditer(latex_str):
133+
cmd = m.group(1)
134+
env = m.group(2).strip()
135+
pos = m.start()
136+
if cmd == "begin":
137+
env_stack.append((env, pos))
138+
else: # cmd == "end"
139+
if not env_stack:
140+
errors.append(f"位置 {pos}: \\end{{{env}}} 没有对应的 \\begin")
141+
else:
142+
last_env, last_pos = env_stack.pop()
143+
if last_env != env:
144+
errors.append(f"位置 {last_pos}\\begin{{{last_env}}} 与位置 {pos}\\end{{{env}}} 不匹配")
145+
if env_stack:
146+
for env, pos in env_stack:
147+
errors.append(f"位置 {pos}: \\begin{{{env}}} 没有对应的 \\end")
148+
return (len(errors) == 0), errors
149+
150+
def run_static_checks(latex_snippet: str) -> list:
151+
cleaned = remove_latex_comments(latex_snippet)
152+
errors = []
153+
ok_braces, brace_errors = check_balanced_braces(cleaned)
154+
ok_env, env_errors = check_environment_matching(cleaned)
155+
if not ok_braces:
156+
errors.extend(["大括号错误: " + err for err in brace_errors])
157+
if not ok_env:
158+
errors.extend(["环境匹配错误: " + err for err in env_errors])
159+
return errors
160+
161+
def check_with_pdflatex(latex_snippet: str) -> list:
162+
"""
163+
call pdflatex for compilation checking and return the error messages detected in the compilation log.
164+
"""
165+
template = r"""
166+
\documentclass{article}
167+
\usepackage{amsmath}
168+
\begin{document}
169+
%s
170+
\end{document}
171+
""" % latex_snippet
172+
173+
errors = []
174+
with tempfile.TemporaryDirectory() as tmpdirname:
175+
tex_file = os.path.join(tmpdirname, "temp.tex")
176+
with open(tex_file, "w", encoding="utf-8") as f:
177+
f.write(template)
178+
try:
179+
proc = subprocess.run(
180+
["pdflatex", "-interaction=nonstopmode", tex_file],
181+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
182+
cwd=tmpdirname, timeout=15
183+
)
184+
output = proc.stdout.decode("utf-8") + proc.stderr.decode("utf-8")
185+
for line in output.splitlines():
186+
if line.startswith("!"):
187+
errors.append(line.strip())
188+
if proc.returncode != 0 and not errors:
189+
errors.append("pdflatex 返回非 0 错误码,编译可能存在问题。")
190+
except Exception as e:
191+
errors.append(f"调用 pdflatex 编译时出错: {e}")
192+
return errors
193+
194+
def extract_latex_segments(markdown_text: str) -> list:
195+
"""
196+
extract latex segments from markdown
197+
"""
198+
segments = []
199+
block_pattern = re.compile(r'\$\$([\s\S]+?)\$\$', re.MULTILINE)
200+
segments.extend(block_pattern.findall(markdown_text))
201+
inline_pattern = re.compile(r'(?<!\$)\$([^$\n]+?)\$(?!\$)')
202+
segments.extend(inline_pattern.findall(markdown_text))
203+
return segments
204+
205+
def latex_errors(markdown_text: str) -> dict:
206+
segments = extract_latex_segments(markdown_text)
207+
report = {}
208+
for idx, seg in enumerate(segments):
209+
seg = seg.strip()
210+
static_errors = run_static_checks(seg)
211+
pdflatex_errors = check_with_pdflatex(seg)
212+
report[f"公式段 {idx+1}"] = {
213+
"原始内容": seg,
214+
"静态检测错误": static_errors,
215+
"pdflatex 检测错误": pdflatex_errors
216+
}
217+
return report
218+
219+
def modify_latex(markdown_text: str, error):
220+
global deepseek
221+
return generate([
222+
{"role": "system", "content": "你是LaTeX校验员。以下是一段Markdown文本,其中的LaTeX代码有错误,请基于报错修正。同时文本要遵循以下中文排版规范:使用全角中文标点;专有名词大小写正确;英文、数字使用半角字符。直接在输出中输出文本内容。"},
223+
{"role": "user", "content": f"<原文>\n{markdown_text}\n</原文>\n\n<报错>\n{error}\n</报错>"}
224+
], deepseek, "deepseek-reasoner")
225+
226+
is_latin = lambda ch: '\u0000' <= ch <= '\u007F' or '\u00A0' <= ch <= '\u024F'
227+
is_nonspace_latin = lambda ch: is_latin(ch) and not ch.isspace() and not ch in """*()[]{}"'/-@#"""
228+
is_nonpunct_cjk = lambda ch: not is_latin(ch) and ch not in "·!¥…()—【】、;:‘’“”,。《》?「」"
229+
230+
def beautify_string(text):
231+
res = ""
232+
for idx in range(len(text)):
233+
if idx and (
234+
(is_nonspace_latin(text[idx]) and is_nonpunct_cjk(text[idx - 1])) or
235+
(is_nonspace_latin(text[idx - 1]) and is_nonpunct_cjk(text[idx]))
236+
): res += " "
237+
res += text[idx]
238+
return res
239+
101240
start = time.time()
102241
print(" Generating topic:")
103242
topic = beautify_string(extract_topic(topics_text))
@@ -110,7 +249,7 @@ def summary(article):
110249

111250
start = time.time()
112251
print(" Generating article:")
113-
article = beautify_string(write_from_outline(outline_result))
252+
article = write_from_outline(outline_result)
114253
print(f" Article written: time spent {time.time() - start:.1f} s")
115254

116255
if latex_errors(article):

0 commit comments

Comments
 (0)