11"""Module for processing XML validation in DocBuild."""
22
33import asyncio
4+ import logging
5+ import subprocess
6+ import tempfile
47from collections .abc import Iterator
58from datetime import date
6- import logging
79from pathlib import Path
8- import tempfile
910
1011from lxml import etree
1112from rich .console import Console
1516from ...constants import XMLDATADIR
1617from ...utils .decorators import RegistryDecorator
1718from ...utils .paths import calc_max_len
19+ from ..commands import run_command
1820from ..context import DocBuildContext
1921
2022# Cast to help with type checking
@@ -76,37 +78,12 @@ def display_results(
7678 console_err .print (f' { message } ' )
7779
7880
79- async def run_command (
80- * args : str , env : dict [str , str ] | None = None
81- ) -> tuple [int , str , str ]:
82- """Run an external command and capture its output.
83-
84- :param args: The command and its arguments separated as tuple elements.
85- :param env: A dictionary of environment variables for the new process.
86- :return: A tuple of (returncode, stdout, stderr).
87- :raises FileNotFoundError: if the command is not found.
88- """
89- process = await asyncio .create_subprocess_exec (
90- * args ,
91- stdout = asyncio .subprocess .PIPE ,
92- stderr = asyncio .subprocess .PIPE ,
93- env = env ,
94- )
95- stdout , stderr = await process .communicate ()
96-
97- # After .communicate() returns, the process has terminated and the
98- # returncode is guaranteed to be set to an integer.
99- assert process .returncode is not None
100-
101- return process .returncode , stdout .decode (), stderr .decode ()
102-
103-
10481async def validate_rng (
10582 xmlfile : Path ,
10683 rng_schema_path : Path = PRODUCT_CONFIG_SCHEMA ,
10784 * ,
10885 xinclude : bool = True ,
109- ) -> tuple [ bool , str ] :
86+ ) -> subprocess . CompletedProcess :
11087 """Validate an XML file against a RELAX NG schema using jing.
11188
11289 If `xinclude` is True (the default), this function resolves XIncludes by
@@ -117,7 +94,7 @@ async def validate_rng(
11794 :param rng_schema_path: The path to the RELAX NG schema file. It supports
11895 both RNC and RNG formats.
11996 :param xinclude: If True, resolve XIncludes with `xmllint` before validation.
120- :return: A tuple containing a boolean success status and any output message .
97+ :return: A subprocess.CompletedProcess containing args, returncode, stdout, stderr .
12198 """
12299 jing_cmd = ['jing' ]
123100 if rng_schema_path .suffix == '.rnc' :
@@ -126,9 +103,6 @@ async def validate_rng(
126103
127104 try :
128105 if xinclude :
129- # Use a temporary file to store the output of xmllint.
130- # This is more robust than piping, especially if jing doesn't
131- # correctly handle stdin (the command "jing schema.rng -" does NOT work.)
132106 with tempfile .NamedTemporaryFile (
133107 prefix = 'jing-validation' ,
134108 suffix = '.xml' ,
@@ -137,34 +111,56 @@ async def validate_rng(
137111 encoding = 'utf-8' ,
138112 ) as tmp_file :
139113 tmp_filepath = Path (tmp_file .name )
140-
141114 # 1. Run xmllint to resolve XIncludes and save to temp file
142- returncode , _ , stderr = await run_command (
143- 'xmllint' , '--xinclude' , '--output' , str (tmp_filepath ), str (xmlfile )
115+ process = await run_command (
116+ 'xmllint' ,
117+ '--xinclude' ,
118+ '--output' ,
119+ str (tmp_filepath ),
120+ str (xmlfile ),
121+ stdout = asyncio .subprocess .PIPE ,
122+ stderr = asyncio .subprocess .PIPE ,
144123 )
145- if returncode != 0 :
146- return False , f'xmllint failed: { stderr .strip ()} '
147-
124+ if process .returncode != 0 :
125+ return subprocess .CompletedProcess (
126+ args = ['xmllint' ],
127+ returncode = process .returncode ,
128+ stdout = process .stdout ,
129+ stderr = process .stderr ,
130+ )
148131 # 2. Run jing on the resolved temporary file
149132 jing_cmd .append (str (tmp_filepath ))
150- returncode , stdout , stderr = await run_command (* jing_cmd )
151- if returncode != 0 :
152- return False , (stdout + stderr ).strip ()
153-
154- return True , ''
133+ process = await run_command (
134+ * jing_cmd ,
135+ stdout = asyncio .subprocess .PIPE ,
136+ stderr = asyncio .subprocess .PIPE ,
137+ )
138+ return subprocess .CompletedProcess (
139+ args = jing_cmd ,
140+ returncode = process .returncode ,
141+ stdout = process .stdout ,
142+ stderr = process .stderr ,
143+ )
155144 else :
156- # Validate directly with jing, no XInclude resolution.
157145 jing_cmd .append (str (xmlfile ))
158- returncode , stdout , stderr = await run_command (* jing_cmd )
159- if returncode == 0 :
160- return True , ''
161- return False , (stdout + stderr ).strip ()
162-
146+ process = await run_command (
147+ * jing_cmd ,
148+ stdout = asyncio .subprocess .PIPE ,
149+ stderr = asyncio .subprocess .PIPE ,
150+ )
151+ return subprocess .CompletedProcess (
152+ args = jing_cmd ,
153+ returncode = process .returncode ,
154+ stdout = process .stdout ,
155+ stderr = process .stderr ,
156+ )
163157 except FileNotFoundError as e :
164158 tool = e .filename or 'xmllint/jing'
165- return (
166- False ,
167- f'{ tool } command not found. Please install it to run validation.' ,
159+ return subprocess .CompletedProcess (
160+ args = [tool ],
161+ returncode = 127 ,
162+ stdout = '' ,
163+ stderr = f'{ tool } command not found. Please install it to run validation.' ,
168164 )
169165
170166
@@ -206,40 +202,31 @@ async def process_file(
206202 '/' .join (path_obj .parts [- 2 :]) if len (path_obj .parts ) >= 2 else str (filepath )
207203 )
208204
209- # IDEA: Should we replace jing and validate with etree.RelaxNG?
210-
211205 # 1. RNG Validation
212- rng_success , rng_output = await validate_rng (path_obj )
213- if not rng_success :
206+ rng_result = await validate_rng (path_obj )
207+ if rng_result . returncode != 0 :
214208 console_err .print (
215209 f'{ shortname :<{max_len }} : RNG validation => [red]failed[/red]'
216210 )
217- if rng_output :
218- console_err .print (f' [bold red]Error:[/] { rng_output } ' )
211+ if rng_result . stderr :
212+ console_err .print (f' [bold red]Error:[/] { rng_result . stderr . strip () } ' )
219213 return 10 # Specific error code for RNG failure
220214
221215 # 2. Python-based checks
222216 try :
223217 tree = await asyncio .to_thread (etree .parse , str (filepath ), parser = None )
224-
225218 except etree .XMLSyntaxError as err :
226- # This can happen if xmllint passes but lxml's parser is stricter.
227219 console_err .print (
228220 f'{ shortname :<{max_len }} : XML Syntax Error => [red]failed[/red]'
229221 )
230222 console_err .print (f' [bold red]Error:[/] { err } ' )
231223 return 20
232-
233224 except Exception as err :
234225 console_err .print (f' [bold red]Error:[/] { err } ' )
235226 return 200
236227
237- # Run all checks for this file
238228 check_results = await run_python_checks (tree )
239-
240- # Display results based on verbosity level
241229 display_results (shortname , check_results , context .verbose , max_len )
242-
243230 return 0 if all (result .success for _ , result in check_results ) else 1
244231
245232
@@ -284,7 +271,9 @@ async def process(
284271
285272 # Filter for files that passed the initial validation
286273 successful_files_paths = [
287- xmlfile for xmlfile , result in zip (xmlfiles , results , strict = False ) if result == 0
274+ xmlfile
275+ for xmlfile , result in zip (xmlfiles , results , strict = False )
276+ if result == 0
288277 ]
289278
290279 # After validating individual files, perform a stitch validation to
0 commit comments