Skip to content

Commit 2fcb49a

Browse files
author
Joshua Rogers
committed
log the failed-to-parse line and data if possible
1 parent b4e33ed commit 2fcb49a

File tree

1 file changed

+68
-2
lines changed

1 file changed

+68
-2
lines changed

gixy/parser/raw_parser.py

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
from pyparsing import (
1010
Literal, Suppress, White, Word, alphanums, Forward, Group, Optional, Combine,
11-
Keyword, OneOrMore, ZeroOrMore, Regex, QuotedString, nestedExpr, ParseResults)
11+
Keyword, OneOrMore, ZeroOrMore, Regex, QuotedString, nestedExpr, ParseResults,
12+
lineno, col, ParseException)
1213

1314
LOG = logging.getLogger(__name__)
1415

@@ -102,7 +103,7 @@ def script(self):
102103

103104
comment = (
104105
Regex(r"#.*")
105-
)("comment").setParseAction(_fix_comment)
106+
)("comment")
106107

107108
hash_value = Group(
108109
value +
@@ -174,6 +175,17 @@ def script(self):
174175
nestedExpr(opener="{", closer="}")
175176
)("unparsed_block")
176177

178+
if_block.setParseAction(attach_line_number)
179+
location_block.setParseAction(attach_line_number)
180+
hash_block.setParseAction(attach_line_number)
181+
generic_block.setParseAction(attach_line_number)
182+
directive.setParseAction(attach_line_number)
183+
include.setParseAction(attach_line_number)
184+
file_delimiter.setParseAction(attach_line_number)
185+
hash_value.setParseAction(attach_line_number)
186+
unparsed_block.setParseAction(attach_line_number, detect_problematic_line)
187+
comment.setParseAction(attach_line_number, _fix_comment)
188+
177189
return sub_block
178190

179191

@@ -189,3 +201,57 @@ def _fix_comment(string, location, tokens):
189201

190202
comment = tokens[0][1:].strip()
191203
return [comment]
204+
205+
def attach_line_number(s, loc, tokens):
206+
"""
207+
Attach line and column information to parsed blocks.
208+
209+
:param s: the original text being parsed
210+
:param loc: the location where the match started
211+
:param tokens: the tokens matched
212+
"""
213+
tokens['line'] = lineno(loc, s)
214+
tokens['col'] = col(loc, s)
215+
return tokens
216+
217+
def flatten_tokens(tokens_list):
218+
parts = []
219+
for tok in tokens_list:
220+
if isinstance(tok, list):
221+
parts.append('{')
222+
parts.append(flatten_tokens(tok))
223+
parts.append('}')
224+
else:
225+
parts.append(str(tok))
226+
return ' '.join(parts)
227+
228+
def detect_problematic_line(s, loc, tokens):
229+
if len(tokens) < 3:
230+
return tokens # not enough content to reparse
231+
232+
block_content = flatten_tokens(tokens[2])
233+
line_num = lineno(loc, s)
234+
col_num = col(loc, s)
235+
try:
236+
# Try to reparse the flattened block
237+
_ = RawParser().script.parseString(block_content, parseAll=True)
238+
except ParseException as e:
239+
error_line_num = e.lineno
240+
error_col = e.col
241+
242+
block_lines = block_content.splitlines()
243+
244+
if 0 < error_line_num <= len(block_lines):
245+
broken_line = block_lines[error_line_num - 1]
246+
start_pos = error_col
247+
snippet = broken_line[start_pos : start_pos + 50].replace("', '", " ") # Replace ', ' due to flattened list(horrible hack)
248+
249+
LOG.warning(
250+
"Detected unparsable content inside block beginning at line %d: '%s'.",
251+
line_num, snippet
252+
)
253+
else:
254+
LOG.warning(
255+
"Detected unparsable content inside block at unknown position."
256+
)
257+
return tokens

0 commit comments

Comments
 (0)