Skip to content

Commit c3ba2e0

Browse files
Bonflintstonejanz93
andcommitted
Better statement separating regex
Split on lines starting with - and being surrounded by at least on newline before, but arbitrary amounts of newlines before and after. The dash can be followed by anything (having seen both '}' and 'ä' in real examples) Co-authored-by: Jan von Magnus <[email protected]>
1 parent 1f4021e commit c3ba2e0

File tree

5 files changed

+192
-1
lines changed

5 files changed

+192
-1
lines changed

lib/cmxl.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ def self.config
1111
@config
1212
end
1313
@config = {
14-
statement_separator: /\n-\s*\n/m,
14+
# One or more newlines
15+
# followed by `-` at the beginning of a line.
16+
# "Eats up" but does not require characters until the end of the line and more newlines.
17+
# \R is a platform independent newline but in the negated group `[^\n\r]` that did not seem to work.
18+
statement_separator: /\R+-[^\n\r]*\R*/m,
1519
raise_line_format_errors: true,
1620
strip_headers: false
1721
}

spec/fixtures/mt940-abnamro.txt

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
ABNANL2A
2+
940
3+
ABNANL2A
4+
:20:ABN AMRO BANK NV
5+
:25:517852257
6+
:28:19321/1
7+
:60F:C110522EUR3236,28
8+
:61:1105240524D9,N192NONREF
9+
:86:GIRO 428428 KPN - DIGITENNE BETALINGSKENM. 000000042188659
10+
5314606715 BETREFT FACTUUR D.D. 20-05-2011
11+
INCL. 1,44 BTW
12+
:61:1105210523D11,59N426NONREF
13+
:86:BEA NR:XXX1234 21.05.11/12.54 DIRCKIII FIL2500 KATWIJK,PAS999
14+
:61:1105230523D11,63N426NONREF
15+
:86:BEA NR:XXX1234 23.05.11/09.08 DIGROS FIL1015 KATWIJK Z,PAS999
16+
:61:1105220523D11,8N426NONREF
17+
:86:BEA NR:XXX1234 22.05.11/14.25 MC DONALDS A44 LEIDEN,PAS999
18+
:61:1105210523D13,45N426NONREF
19+
:86:BEA NR:XXX1234 21.05.11/12.09 PRINCE FIL. 55 KATWIJK Z,PAS999
20+
:61:1105210523D15,49N426NONREF
21+
:86:BEA NR:XXX1234 21.05.11/12.55 DIRX FIL6017 KATWIJK ZH ,PAS999
22+
23+
:61:1105210523D107,N426NONREF
24+
:86:BEA NR:XXX1234 21.05.11/12.04 HANS ANDERS OPT./056 KAT,PAS999
25+
:61:1105220523D141,48N426NONREF
26+
:86:BEA NR:XXX1234 22.05.11/13.45 MYCOM DEN HAAG S-GRAVEN,PAS999
27+
:62F:C110523EUR876,84
28+
29+
-
30+
31+
ABNANL2A
32+
940
33+
ABNANL2A
34+
:20:ABN AMRO BANK NV
35+
:25:517852257
36+
:28:19322/1
37+
:60F:C110523EUR2876,84
38+
:61:1105240524D9,49N426NONREF
39+
:86:BEA NR:XXX1234 24.05.11/09.18 PETS PLACE KATWIJK KATWI,PAS999
40+
:61:1105240524D15,N426NONREF
41+
:86:52.89.39.882 MYCOM DEN HAAG S-GRAVEN,PAS999
42+
:62F:C110524EUR1849,75
43+
44+
-
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
ä4:
2+
:20:5566778899100112
3+
:25:10020030/1234567
4+
:28C:188/1
5+
:60F:C130928SEK0,
6+
:62F:C130930SEK0,
7+
:64:C130930SEK0,
8+
9+
ä4:
10+
:20:5566778899100169
11+
:25:10020030/1234567
12+
:28C:188/1
13+
:60F:C130928SEK0,
14+
:62F:C130930SEK0,
15+
:64:C130930SEK0,
16+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{1:D02AASDISLNETAXXXXXXXXXXXXX}
2+
{2:E623XXXXXXXXAXXXN}
3+
{4:
4+
:20:1234567
5+
:21:9876543210
6+
:25:10020030/1234567
7+
:28C:5/1
8+
:60F:C160314EUR2187,95
9+
:61:0211011102DR800,NSTONONREF//55555
10+
:86:008?00DAUERAUFTRAG?100599?20Miete November?3010020030?31234567?32MUELLER?34339
11+
:61:0211021102CR3000,NTRFNONREF//55555
12+
:86:051?00UEBERWEISUNG?100599?20Gehalt Oktober?21Firma
13+
Mustermann GmbH?3050060400?310847564700?32MUELLER?34339
14+
:62F:C160315EUR4387,95
15+
:86:Some random data
16+
-}

spec/mt940_parsing_spec.rb

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,115 @@
9595
expect(subject[0].transactions.count).to eql(1)
9696
end
9797
end
98+
99+
describe "MT940 abnamro" do
100+
it "splits the file into on statement with the headers included" do
101+
expected_data = <<~MT940.chomp
102+
ABNANL2A
103+
940
104+
ABNANL2A
105+
:20:ABN AMRO BANK NV
106+
:25:517852257
107+
:28:19321/1
108+
:60F:C110522EUR3236,28
109+
:61:1105240524D9,N192NONREF
110+
:86:GIRO 428428 KPN - DIGITENNE BETALINGSKENM. 000000042188659
111+
5314606715 BETREFT FACTUUR D.D. 20-05-2011
112+
INCL. 1,44 BTW
113+
:61:1105210523D11,59N426NONREF
114+
:86:BEA NR:XXX1234 21.05.11/12.54 DIRCKIII FIL2500 KATWIJK,PAS999
115+
:61:1105230523D11,63N426NONREF
116+
:86:BEA NR:XXX1234 23.05.11/09.08 DIGROS FIL1015 KATWIJK Z,PAS999
117+
:61:1105220523D11,8N426NONREF
118+
:86:BEA NR:XXX1234 22.05.11/14.25 MC DONALDS A44 LEIDEN,PAS999
119+
:61:1105210523D13,45N426NONREF
120+
:86:BEA NR:XXX1234 21.05.11/12.09 PRINCE FIL. 55 KATWIJK Z,PAS999
121+
:61:1105210523D15,49N426NONREF
122+
:86:BEA NR:XXX1234 21.05.11/12.55 DIRX FIL6017 KATWIJK ZH ,PAS999
123+
124+
:61:1105210523D107,N426NONREF
125+
:86:BEA NR:XXX1234 21.05.11/12.04 HANS ANDERS OPT./056 KAT,PAS999
126+
:61:1105220523D141,48N426NONREF
127+
:86:BEA NR:XXX1234 22.05.11/13.45 MYCOM DEN HAAG S-GRAVEN,PAS999
128+
:62F:C110523EUR876,84
129+
MT940
130+
stub = instance_double(Cmxl::Statement)
131+
allow(Cmxl::Statement).to receive(:new).and_return(stub)
132+
133+
Cmxl.parse(mt940_file('mt940-abnamro'))
134+
135+
expect(Cmxl::Statement).to have_received(:new).with(expected_data)
136+
end
137+
138+
it 'splits the file into two statements' do
139+
allow(Cmxl::Statement).to receive(:new)
140+
141+
Cmxl.parse(mt940_file('mt940-abnamro'))
142+
143+
expect(Cmxl::Statement).to have_received(:new).twice
144+
end
145+
end
146+
147+
describe 'MT940 handelsbank' do
148+
it 'splits the file with the special characters correctly' do
149+
expected_data = <<~MT940.chomp
150+
채4:
151+
:20:5566778899100112
152+
:25:10020030/1234567
153+
:28C:188/1
154+
:60F:C130928SEK0,
155+
:62F:C130930SEK0,
156+
:64:C130930SEK0,
157+
MT940
158+
stub = instance_double(Cmxl::Statement)
159+
allow(Cmxl::Statement).to receive(:new).and_return(stub)
160+
161+
Cmxl.parse(mt940_file('mt940-handelsbank'))
162+
163+
expect(Cmxl::Statement).to have_received(:new).with(expected_data)
164+
end
165+
166+
it 'splits the file into two statements' do
167+
allow(Cmxl::Statement).to receive(:new)
168+
169+
Cmxl.parse(mt940_file('mt940-handelsbank'))
170+
171+
expect(Cmxl::Statement).to have_received(:new).twice
172+
end
173+
end
174+
175+
describe "MT940 windows line breaks" do
176+
it 'splits the file with the special characters correctly' do
177+
expected_data =
178+
"{1:D02AASDISLNETAXXXXXXXXXXXXX}\r\n"\
179+
"{2:E623XXXXXXXXAXXXN}\r\n"\
180+
"{4:\r\n"\
181+
":20:1234567\r\n"\
182+
":21:9876543210\r\n"\
183+
":25:10020030/1234567\r\n"\
184+
":28C:5/1\r\n"\
185+
":60F:C160314EUR2187,95\r\n"\
186+
":61:0211011102DR800,NSTONONREF//55555\r\n"\
187+
":86:008?00DAUERAUFTRAG?100599?20Miete November?3010020030?31234567?32MUELLER?34339\r\n"\
188+
":61:0211021102CR3000,NTRFNONREF//55555\r\n"\
189+
":86:051?00UEBERWEISUNG?100599?20Gehalt Oktober?21Firma\r\n"\
190+
"Mustermann GmbH?3050060400?310847564700?32MUELLER?34339\r\n"\
191+
":62F:C160315EUR4387,95\r\n"\
192+
":86:Some random data"
193+
stub = instance_double(Cmxl::Statement)
194+
allow(Cmxl::Statement).to receive(:new).and_return(stub)
195+
196+
Cmxl.parse(mt940_file('mt940-windows-line-breaks'))
197+
198+
expect(Cmxl::Statement).to have_received(:new).with(expected_data)
199+
end
200+
201+
it 'splits the file into two statements' do
202+
allow(Cmxl::Statement).to receive(:new)
203+
204+
Cmxl.parse(mt940_file('mt940-windows-line-breaks'))
205+
206+
expect(Cmxl::Statement).to have_received(:new).once
207+
end
208+
end
98209
end

0 commit comments

Comments
 (0)