Skip to content

Commit 7ca56e0

Browse files
committed
Merge pull request #23 from azoner/bug-gh21
Handle CR/LF characters in element values
2 parents 9034c4f + 26dc28c commit 7ca56e0

File tree

7 files changed

+98
-51
lines changed

7 files changed

+98
-51
lines changed

pyx12/map_if.py

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -782,36 +782,36 @@ def is_match(self, seg):
782782
"""
783783
if seg.get_seg_id() == self.id:
784784
if self.children[0].is_element() \
785-
and self.children[0].get_data_type() == 'ID' \
786-
and self.children[0].usage == 'R' \
787-
and len(self.children[0].valid_codes) > 0 \
788-
and seg.get_value('01') not in self.children[0].valid_codes:
785+
and self.children[0].get_data_type() == 'ID' \
786+
and self.children[0].usage == 'R' \
787+
and len(self.children[0].valid_codes) > 0 \
788+
and seg.get_value('01') not in self.children[0].valid_codes:
789789
#logger.debug('is_match: %s %s' % (seg.get_seg_id(), seg[1]), self.children[0].valid_codes)
790790
return False
791791
# Special Case for 820
792792
elif seg.get_seg_id() == 'ENT' \
793-
and self.children[1].is_element() \
794-
and self.children[1].get_data_type() == 'ID' \
795-
and len(self.children[1].valid_codes) > 0 \
796-
and seg.get_value('02') not in self.children[1].valid_codes:
793+
and self.children[1].is_element() \
794+
and self.children[1].get_data_type() == 'ID' \
795+
and len(self.children[1].valid_codes) > 0 \
796+
and seg.get_value('02') not in self.children[1].valid_codes:
797797
#logger.debug('is_match: %s %s' % (seg.get_seg_id(), seg[1]), self.children[0].valid_codes)
798798
return False
799799
# Special Case for 999 CTX
800800
# IG defines the dataelement 2100/CT01-1 as an AN, but acts like an ID
801801
elif seg.get_seg_id() == 'CTX' \
802-
and self.children[0].is_composite() \
803-
and self.children[0].children[0].get_data_type() == 'AN' \
804-
and len(self.children[0].children[0].valid_codes) > 0 \
805-
and seg.get_value('01-1') not in self.children[0].children[0].valid_codes:
802+
and self.children[0].is_composite() \
803+
and self.children[0].children[0].get_data_type() == 'AN' \
804+
and len(self.children[0].children[0].valid_codes) > 0 \
805+
and seg.get_value('01-1') not in self.children[0].children[0].valid_codes:
806806
return False
807807
elif self.children[0].is_composite() \
808-
and self.children[0].children[0].get_data_type() == 'ID' \
809-
and len(self.children[0].children[0].valid_codes) > 0 \
810-
and seg.get_value('01-1') not in self.children[0].children[0].valid_codes:
808+
and self.children[0].children[0].get_data_type() == 'ID' \
809+
and len(self.children[0].children[0].valid_codes) > 0 \
810+
and seg.get_value('01-1') not in self.children[0].children[0].valid_codes:
811811
return False
812812
elif seg.get_seg_id() == 'HL' and self.children[2].is_element() \
813-
and len(self.children[2].valid_codes) > 0 \
814-
and seg.get_value('03') not in self.children[2].valid_codes:
813+
and len(self.children[2].valid_codes) > 0 \
814+
and seg.get_value('03') not in self.children[2].valid_codes:
815815
return False
816816
else:
817817
return True
@@ -1071,8 +1071,7 @@ def __init__(self, root, parent, elem):
10711071
if self.res is not None and self.res != '':
10721072
self.rec = re.compile(self.res, re.S)
10731073
except Exception:
1074-
raise EngineError('Element regex "%s" failed to compile' %
1075-
(self.res))
1074+
raise EngineError('Element regex "%s" failed to compile' % (self.res))
10761075

10771076
v = elem.find('valid_codes')
10781077
if v is not None:
@@ -1161,7 +1160,6 @@ def is_valid(self, elem, errh, type_list=[]):
11611160
(self.name, self.refdes)
11621161
self._error(errh, err_str, '6', elem.__repr__())
11631162
return False
1164-
11651163
if elem is None or elem.get_value() == '':
11661164
if self.usage in ('N', 'S'):
11671165
return True
@@ -1212,6 +1210,12 @@ def is_valid(self, elem, errh, type_list=[]):
12121210
self._error(errh, err_str, '5', elem_val)
12131211
valid = False
12141212

1213+
(res, bad_string) = validation.contains_control_character(elem_val)
1214+
if res:
1215+
err_str = 'Data element "%s" (%s), contains an invalid control character(%s)' % \
1216+
(self.name, self.refdes, bad_string)
1217+
self._error(errh, err_str, '6', bad_string)
1218+
valid = False
12151219
if data_type in ['AN', 'ID'] and elem_val[-1] == ' ':
12161220
if len(elem_val.rstrip()) >= min_len:
12171221
err_str = 'Element "%s" (%s) has unnecessary trailing spaces. (%s)' % \
@@ -1240,8 +1244,7 @@ def is_valid(self, elem, errh, type_list=[]):
12401244
if len(type_list) > 0:
12411245
valid_type = False
12421246
for dtype in type_list:
1243-
valid_type |= validation.IsValidDataType(elem_val,
1244-
dtype, self.root.param.get('charset'))
1247+
valid_type |= validation.IsValidDataType(elem_val, dtype, self.root.param.get('charset'))
12451248
if not valid_type:
12461249
if 'TM' in type_list:
12471250
err_str = 'Data element "%s" (%s) contains an invalid time (%s)' % \
@@ -1273,7 +1276,7 @@ def _is_valid_code(self, elem_val, errh):
12731276
if elem_val in self.valid_codes:
12741277
bValidCode = True
12751278
if self.external_codes is not None and \
1276-
self.root.ext_codes.isValid(self.external_codes, elem_val):
1279+
self.root.ext_codes.isValid(self.external_codes, elem_val):
12771280
bValidCode = True
12781281
if not bValidCode:
12791282
err_str = '(%s) is not a valid code for %s (%s)' % (
@@ -1341,7 +1344,9 @@ def _error(self, errh, err_str, err_cde, elem_val):
13411344
"""
13421345
Forward the error to an error_handler
13431346
"""
1344-
errh.ele_error(err_cde, err_str, elem_val, self.refdes)
1347+
err_str2 = err_str.replace('\n', '').replace('\r', '')
1348+
elem_val2 = elem_val.replace('\n', '').replace('\r', '')
1349+
errh.ele_error(err_cde, err_str2, elem_val2, self.refdes)
13451350
#, pos=self.seq, data_ele=self.data_ele)
13461351

13471352
def debug_print(self):
@@ -1408,8 +1413,7 @@ def is_valid(self, comp_data, errh):
14081413
valid = False
14091414
for i in range(min(len(comp_data), self.get_child_count())):
14101415
valid &= self.get_child_node_by_idx(i).is_valid(comp_data[i], errh)
1411-
for i in range(min(len(comp_data), self.get_child_count()),
1412-
self.get_child_count()):
1416+
for i in range(min(len(comp_data), self.get_child_count()), self.get_child_count()):
14131417
if i < self.get_child_count():
14141418
#Check missing required elements
14151419
valid &= self.get_child_node_by_idx(i).is_valid(None, errh)

pyx12/rawx12file.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ def __init__(self, fin):
5959
def __iter__(self):
6060
"""
6161
Iterate over input lines
62+
Often, X12 files have a CR-LF after the segment delimiter.
63+
Split the input stream on the delimiter and remove any leading CR-LF
6264
"""
6365
while True:
6466
if self.buffer.find(self.seg_term) == -1:
@@ -69,7 +71,7 @@ def __iter__(self):
6971
break
7072
# Get first segment in buffer
7173
(line, self.buffer) = self.buffer.split(self.seg_term, 1)
72-
line = line.replace('\n', '').replace('\r', '')
74+
line = line.lstrip('\n\r')
7375
if line == '':
7476
break
7577
yield(line)

pyx12/segment.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -484,9 +484,7 @@ def format(self, seg_term=None, ele_term=None, subele_term=None):
484484
break
485485
for ele in self.elements[:i + 1]:
486486
str_elems.append(ele.format(subele_term))
487-
return '%s%s%s%s' % (self.seg_id, ele_term,
488-
ele_term.join(str_elems),
489-
seg_term)
487+
return '%s%s%s%s' % (self.seg_id, ele_term, ele_term.join(str_elems), seg_term)
490488

491489
def format_ele_list(self, str_elems, subele_term=None):
492490
"""

pyx12/test/test_x12n_document.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,6 @@ class Test5010(X12DocumentTestCase):
152152

153153
def test_834_lui_id_5010(self):
154154
self._test_999('834_lui_id_5010')
155+
156+
def test_834_eol_in_element(self):
157+
self._test_999('834_eol_in_element')

pyx12/test/x12testdata.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2171,11 +2171,52 @@
21712171
SE*6*0001~
21722172
GE*1*13360001~
21732173
IEA*1*703201721~
2174+
"""},
2175+
'834_eol_in_element': {
2176+
'source': """ISA*00* *00* *ZZ*D00XXX *ZZ*00AA *070305*1832*U*00501*000701336*0*P*:~
2177+
GS*BE*D00XXX*00AA*20070305*1832*13360001*X*005010X220A1~
2178+
ST*834*0001*005010X220A1~
2179+
BGN*00*88880070301 00*20070305*181245****4~
2180+
DTP*007*D8*20070301~
2181+
N1*P5*PAYER 1*FI*999999999~
2182+
N1*IN*KCMHSAS*FI*999999999~
2183+
INS*Y*18*030*XN*A*C**FT~
2184+
REF*0F*00389999~
2185+
REF*1L*000003409999~
2186+
REF*3H*K129999A~
2187+
DTP*356*D8*20070301~
2188+
NM1*IL*1*DOE*JOHN*A***34*999999999~
2189+
N3*777 ELM ST
2190+
APT 55~
2191+
N4*ALLEGAN*MI*49010**CY*03~
2192+
DMG*D8*19670330*M**O~
2193+
LUI***ESSPANISH~
2194+
HD*030**AK*064703*IND~
2195+
DTP*348*D8*20070301~
2196+
AMT*P3*45.34~
2197+
REF*17*E 1F~
2198+
SE*20*0001~
2199+
GE*1*13360001~
2200+
IEA*1*000701336~
2201+
""",
2202+
'resAck': """ISA*00* *00* *ZZ*00AA *ZZ*D00XXX *131107*1503*^*00501*311071503*0*P*:~
2203+
GS*FA*00AA*D00XXX*20131107*150355*608852007*X*005010X231~
2204+
ST*999*0001*005010X231~
2205+
AK1*BE*13360001*005010X220A1~
2206+
AK2*834*0001*005010X220A1~
2207+
IK3*N3*12**8~
2208+
IK4*1*166*6*<LF>~
2209+
IK5*R*5~
2210+
AK9*R*1*1*0~
2211+
SE*8*0001~
2212+
GE*1*608852007~
2213+
IEA*1*311071503~
21742214
"""},
21752215
}
21762216

21772217
if __name__ == '__main__':
2218+
import os.path
21782219
for k in datafiles:
21792220
if 'source' in datafiles[k]:
2180-
with open(k + '.txt', 'w') as f:
2221+
with open(os.path.join('files', k + '.txt'), 'w') as f:
21812222
f.write(datafiles[k]['source'])

pyx12/validation.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def IsValidDataType(str_val, data_type, charset='B', icvn='00401'):
2828
@param charset: [optional] - 'B' for Basic X12 character set, 'E' for extended
2929
@type charset: string
3030
@rtype: boolean
31+
@todo: need to generalize control character validation
3132
"""
3233
if not data_type:
3334
return True
@@ -204,3 +205,11 @@ def is_valid_time(val):
204205
except IsValidError:
205206
return False
206207
return True
208+
209+
210+
def contains_control_character(str_val, charset='B', icvn='00401'):
211+
if '\n' in str_val:
212+
return (True, '<LF>')
213+
if '\r' in str_val:
214+
return (True, '<CR>')
215+
return (False, None)

pyx12/x12file.py

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -74,22 +74,19 @@ def _parse_segment(self, seg_data):
7474
"""
7575
if seg_data.is_empty():
7676
err_str = 'Segment "%s" is empty' % (seg_data)
77-
self._seg_error('8', err_str, None,
78-
src_line=self.cur_line + 1)
77+
self._seg_error('8', err_str, None, src_line=self.cur_line + 1)
7978
if not seg_data.is_seg_id_valid():
8079
err_str = 'Segment identifier "%s" is invalid' % (
8180
seg_data.get_seg_id())
82-
self._seg_error('1', err_str, None,
83-
src_line=self.cur_line + 1)
81+
self._seg_error('1', err_str, None, src_line=self.cur_line + 1)
8482
seg_id = seg_data.get_seg_id()
8583
if seg_id == 'ISA':
8684
if len(seg_data) != 16:
8785
raise pyx12.errors.X12Error('The ISA segment must have 16 elements (%s)' % (seg_data))
8886
interchange_control_number = seg_data.get_value('ISA13')
8987
if interchange_control_number in self.isa_ids:
9088
err_str = 'ISA Interchange Control Number '
91-
err_str += '%s not unique within file' \
92-
% (interchange_control_number)
89+
err_str += '%s not unique within file' % (interchange_control_number)
9390
self._isa_error('025', err_str)
9491
self.loops.append(('ISA', interchange_control_number))
9592
self.isa_ids.append(interchange_control_number)
@@ -100,8 +97,7 @@ def _parse_segment(self, seg_data):
10097
group_control_number = seg_data.get_value('GS06')
10198
if group_control_number in self.gs_ids:
10299
err_str = 'GS Interchange Control Number '
103-
err_str += '%s not unique within file' \
104-
% (group_control_number)
100+
err_str += '%s not unique within file' % (group_control_number)
105101
self._gs_error('6', err_str)
106102
self.gs_count += 1
107103
self.gs_ids.append(group_control_number)
@@ -114,8 +110,7 @@ def _parse_segment(self, seg_data):
114110
transaction_control_number = seg_data.get_value('ST02')
115111
if transaction_control_number in self.st_ids:
116112
err_str = 'ST Interchange Control Number '
117-
err_str += '%s not unique within file' \
118-
% (transaction_control_number)
113+
err_str += '%s not unique within file' % (transaction_control_number)
119114
self._st_error('23', err_str)
120115
self.st_count += 1
121116
self.st_ids.append(transaction_control_number)
@@ -135,14 +130,12 @@ def _parse_segment(self, seg_data):
135130
#raise pyx12.errors.X12Error, \
136131
# 'My HL count %i does not match your HL count %s' \
137132
# % (self.hl_count, seg[1])
138-
err_str = 'My HL count %i does not match your HL count %s' \
139-
% (self.hl_count, hl_count)
133+
err_str = 'My HL count %i does not match your HL count %s' % (self.hl_count, hl_count)
140134
self._seg_error('HL1', err_str)
141135
if seg_data.get_value('HL02') != '':
142136
hl_parent = self._int(seg_data.get_value('HL02'))
143137
if hl_parent not in self.hl_stack:
144-
err_str = 'HL parent (%i) is not a valid parent' \
145-
% (hl_parent)
138+
err_str = 'HL parent (%i) is not a valid parent' % (hl_parent)
146139
self._seg_error('HL2', err_str)
147140
while self.hl_stack and hl_parent != self.hl_stack[-1]:
148141
del self.hl_stack[-1]
@@ -324,8 +317,7 @@ def __init__(self, src_file_obj):
324317
self.raw = RawX12File(self.fd_in)
325318
except pyx12.errors.X12Error:
326319
raise
327-
(seg_term, ele_term, subele_term, eol,
328-
repetition_term) = self.raw.get_term()
320+
(seg_term, ele_term, subele_term, eol, repetition_term) = self.raw.get_term()
329321
self.seg_term = seg_term
330322
self.ele_term = ele_term
331323
self.subele_term = subele_term
@@ -401,8 +393,7 @@ def __iter__(self):
401393
# We have not yet incremented cur_line
402394
if line[-1] == self.ele_term:
403395
err_str = 'Segment contains trailing element terminators'
404-
self._seg_error('SEG1', err_str, None,
405-
src_line=self.cur_line + 1)
396+
self._seg_error('SEG1', err_str, None, src_line=self.cur_line + 1)
406397
seg_data = pyx12.segment.Segment(line, self.seg_term, self.ele_term, self.subele_term)
407398
self._parse_segment(seg_data)
408399
yield(seg_data)
@@ -563,8 +554,7 @@ def _write_segment(self, seg_data):
563554
@param seg_data: segment to write
564555
@type seg_data: L{segment<segment.Segment>}
565556
"""
566-
out = seg_data.format(
567-
self.seg_term, self.ele_term, self.subele_term) + self.eol
557+
out = seg_data.format(self.seg_term, self.ele_term, self.subele_term) + self.eol
568558
self.fd_out.write(out.decode('ascii'))
569559

570560
def _write_isa_segment(self, seg_data):

0 commit comments

Comments
 (0)