Skip to content

Commit ea78fcc

Browse files
authored
feat: support RGROUPS in Molfile V3000 (#375)
1 parent 2c86ee6 commit ea78fcc

File tree

7 files changed

+313
-10
lines changed

7 files changed

+313
-10
lines changed
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import { expect, test } from 'vitest';
2+
3+
import { Molecule } from '../lib';
4+
5+
test('should parse molfile with RGROUPS 1', () => {
6+
const molfile = `Spiramycin
7+
Actelion Java MolfileCreator 2.0
8+
9+
0 0 0 0 0 0 0 V3000
10+
M V30 BEGIN CTAB
11+
M V30 COUNTS 60 63 0 0 0
12+
M V30 BEGIN ATOM
13+
M V30 1 R# 0.0 0.0 0.0 0 RGROUPS=(1 0)
14+
M V30 2 O -0.0157 1.5237 0.0 0
15+
M V30 3 C -1.3666 2.3406 0.0 0 CFG=1
16+
M V30 4 C -1.3823 3.8644 0.0 0 CFG=2
17+
M V30 5 C -2.7176 1.6651 0.0 0
18+
M V30 6 O -2.1521 5.2154 0.0 0
19+
M V30 7 C -0.3455 4.964 0.0 0 CFG=1
20+
M V30 8 C -4.0372 2.4663 0.0 0
21+
M V30 9 C -2.9218 6.5663 0.0 0
22+
M V30 10 O 0.8797 4.2571 0.0 0
23+
M V30 11 C -0.377 6.4878 0.0 0 CFG=2
24+
M V30 12 O -4.0529 3.9901 0.0 0
25+
M V30 13 O -5.3882 1.8065 0.0 0
26+
M V30 14 C 2.2778 3.9115 0.0 0
27+
M V30 15 C -1.6965 7.289 0.0 0
28+
M V30 16 C 0.8797 7.2575 0.0 0
29+
M V30 17 C -6.7234 2.6234 0.0 0 CFG=1
30+
M V30 18 O 3.1889 3.2674 0.0 0
31+
M V30 19 C 3.723 4.0843 0.0 0
32+
M V30 20 C -1.7122 8.8285 0.0 0 CFG=2
33+
M V30 21 C 2.1207 6.5506 0.0 0
34+
M V30 22 C -6.7391 4.1471 0.0 0
35+
M V30 23 C -8.0901 1.9636 0.0 0
36+
M V30 24 C 3.5659 1.6023 0.0 0
37+
M V30 25 O 4.3985 5.4039 0.0 0
38+
M V30 26 C 4.0529 2.482 0.0 0
39+
M V30 27 C -3.0475 9.6139 0.0 0 CFG=1
40+
M V30 28 C -0.4555 9.5825 0.0 0
41+
M V30 29 O 3.3774 7.3047 0.0 0
42+
M V30 30 C -5.4824 4.8855 0.0 0
43+
M V30 31 C 5.0111 1.7594 0.0 0
44+
M V30 32 C 2.7805 0.377 0.0 0
45+
M V30 33 N 5.451 2.9218 0.0 0
46+
M V30 34 O -3.0789 11.1534 0.0 0
47+
M V30 35 C -4.4142 8.9541 0.0 0
48+
M V30 36 C -5.5138 6.425 0.0 0
49+
M V30 37 O 5.7023 0.5341 0.0 0
50+
M V30 38 C 6.1265 4.2571 0.0 0
51+
M V30 39 C 6.8491 2.5762 0.0 0
52+
M V30 40 C -2.0421 12.253 0.0 0
53+
M V30 41 C -4.477 7.5246 0.0 0
54+
M V30 42 C 7.1476 0.5812 0.0 0
55+
M V30 43 O -0.9268 12.6615 0.0 0
56+
M V30 44 C -1.3509 13.5726 0.0 0
57+
M V30 45 O 7.6189 -1.021 0.0 0
58+
M V30 46 C 8.3886 -0.1099 0.0 0
59+
M V30 47 C 0.6754 12.0959 0.0 0
60+
M V30 48 C 0.1885 13.0071 0.0 0
61+
M V30 49 C 9.5982 -0.6911 0.0 0
62+
M V30 50 C 10.2737 0.2513 0.0 0
63+
M V30 51 C 1.3666 13.4155 0.0 0
64+
M V30 52 C 1.9007 11.4047 0.0 0
65+
M V30 53 C 10.745 -1.3509 0.0 0
66+
M V30 54 C 9.5354 -2.1207 0.0 0
67+
M V30 55 O 10.2109 1.7751 0.0 0
68+
M V30 56 C 11.7189 0.2827 0.0 0
69+
M V30 57 N 2.6234 14.1695 0.0 0
70+
M V30 58 O 12.1431 -0.9268 0.0 0
71+
M V30 59 C 3.6602 15.2692 0.0 0
72+
M V30 60 C 4.0215 13.8239 0.0 0
73+
M V30 END ATOM
74+
M V30 BEGIN BOND
75+
M V30 1 1 1 2
76+
M V30 2 1 3 2 CFG=3
77+
M V30 3 1 3 4
78+
M V30 4 1 3 5
79+
M V30 5 1 4 6 CFG=3
80+
M V30 6 1 4 7
81+
M V30 7 1 5 8
82+
M V30 8 1 6 9
83+
M V30 9 1 7 10 CFG=3
84+
M V30 10 1 7 11
85+
M V30 11 2 8 12
86+
M V30 12 1 8 13
87+
M V30 13 1 10 14
88+
M V30 14 1 11 15
89+
M V30 15 1 11 16 CFG=3
90+
M V30 16 1 13 17
91+
M V30 17 1 14 18
92+
M V30 18 1 14 19
93+
M V30 19 1 15 20
94+
M V30 20 1 16 21
95+
M V30 21 1 17 22
96+
M V30 22 1 17 23 CFG=3
97+
M V30 23 1 18 24
98+
M V30 24 1 19 25
99+
M V30 25 1 19 26
100+
M V30 26 1 20 27
101+
M V30 27 1 20 28 CFG=3
102+
M V30 28 2 21 29
103+
M V30 29 1 22 30
104+
M V30 30 1 24 31
105+
M V30 31 1 24 32
106+
M V30 32 1 26 33
107+
M V30 33 1 27 34 CFG=3
108+
M V30 34 1 27 35
109+
M V30 35 2 30 36
110+
M V30 36 1 31 37
111+
M V30 37 1 33 38
112+
M V30 38 1 33 39
113+
M V30 39 1 34 40
114+
M V30 40 2 35 41
115+
M V30 41 1 37 42
116+
M V30 42 1 40 43
117+
M V30 43 1 40 44
118+
M V30 44 1 42 45
119+
M V30 45 1 42 46
120+
M V30 46 1 43 47
121+
M V30 47 1 44 48
122+
M V30 48 1 45 49
123+
M V30 49 1 46 50
124+
M V30 50 1 47 51
125+
M V30 51 1 47 52
126+
M V30 52 1 49 53
127+
M V30 53 1 49 54
128+
M V30 54 1 50 55
129+
M V30 55 1 50 56
130+
M V30 56 1 51 57
131+
M V30 57 1 53 58
132+
M V30 58 1 57 59
133+
M V30 59 1 57 60
134+
M V30 60 1 26 31
135+
M V30 61 1 36 41
136+
M V30 62 1 48 51
137+
M V30 63 1 50 53
138+
M V30 END BOND
139+
M V30 BEGIN COLLECTION
140+
M V30 MDLV30/STEABS ATOMS=(7 3 4 7 11 17 20 27)
141+
M V30 END COLLECTION
142+
M V30 END CTAB
143+
M END
144+
`;
145+
146+
const molecule = Molecule.fromMolfile(molfile);
147+
expect(molecule.getAllAtoms()).toBe(60);
148+
149+
expect(molecule.getAtomCustomLabel(0)).toBe('R0');
150+
expect(molecule.getAtomicNo(0)).toBe(0);
151+
expect(molecule.getAtomLabel(0)).toBe('?');
152+
});
153+
154+
test('should parse molfile with RGROUPS 2', () => {
155+
const molfile = `
156+
Mrv2308 02172612172D
157+
158+
0 0 0 0 0 999 V3000
159+
M V30 BEGIN CTAB
160+
M V30 COUNTS 7 7 0 0 0
161+
M V30 BEGIN ATOM
162+
M V30 1 C -1.3959 1.7901 0 0
163+
M V30 2 C -2.7295 1.0201 0 0
164+
M V30 3 C -2.7295 -0.5201 0 0
165+
M V30 4 C -1.3959 -1.2901 0 0
166+
M V30 5 C -0.0622 -0.5201 0 0
167+
M V30 6 C -0.0622 1.0201 0 0
168+
M V30 7 R# -1.396 3.3301 0 0 RGROUPS=(1 1)
169+
M V30 END ATOM
170+
M V30 BEGIN BOND
171+
M V30 1 2 1 2
172+
M V30 2 1 2 3
173+
M V30 3 2 3 4
174+
M V30 4 1 4 5
175+
M V30 5 2 5 6
176+
M V30 6 1 6 1
177+
M V30 7 1 1 7
178+
M V30 END BOND
179+
M V30 END CTAB
180+
M END`;
181+
182+
const molecule = Molecule.fromMolfile(molfile);
183+
expect(molecule.getAllAtoms()).toBe(7);
184+
185+
expect(molecule.getAtomCustomLabel(6)).toBe('R1');
186+
expect(molecule.getAtomicNo(6)).toBe(0);
187+
expect(molecule.getAtomLabel(6)).toBe('?');
188+
});

src/com/actelion/research/gwt/jre/java/util/regex/Matcher.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,28 @@
11
package java.util.regex;
22

33
import org.cheminfo.utils.JSException;
4+
import org.cheminfo.utils.JSRegexMatcher;
5+
import org.cheminfo.utils.JSRegexMatchResult;
46

57
public class Matcher {
8+
private JSRegexMatcher jsMatcher;
9+
// To simplify emulation for now, `find` can only be called once.
10+
private boolean wasCalled = false;
11+
private JSRegexMatchResult matchResult = null;
12+
13+
public Matcher(JSRegexMatcher matcher) {
14+
this.jsMatcher = matcher;
15+
}
16+
617
public boolean find() {
7-
JSException.throwUnimplemented();
8-
return false;
18+
if (wasCalled) {
19+
JSException.throwError("find was already called on this matcher");
20+
return false;
21+
} else {
22+
wasCalled = true;
23+
matchResult = this.jsMatcher.find();
24+
return matchResult != null;
25+
}
926
}
1027

1128
public boolean find(int start) {
@@ -19,7 +36,6 @@ public MatchResult toMatchResult() {
1936
}
2037

2138
public String group(int group) {
22-
JSException.throwUnimplemented();
23-
return null;
39+
return matchResult.group(group);
2440
}
2541
}
Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,27 @@
11
package java.util.regex;
22

33
import org.cheminfo.utils.JSException;
4+
import org.cheminfo.utils.JSRegexMatcher;
5+
import org.cheminfo.utils.JSRegexPattern;
46

57
public class Pattern {
6-
public Pattern() {}
8+
private JSRegexPattern jsPattern;
79

8-
public static Pattern compile(String p) {
9-
JSException.throwUnimplemented();
10-
return null;
10+
private Pattern(JSRegexPattern jsPattern) {
11+
this.jsPattern = jsPattern;
12+
}
13+
14+
public static Pattern compile(String p) throws PatternSyntaxException {
15+
JSRegexPattern jsPattern = JSRegexPattern.compile(p);
16+
if (jsPattern == null) {
17+
throw new PatternSyntaxException("invalid JS RegExp: " + p);
18+
} else {
19+
return new Pattern(jsPattern);
20+
}
1121
}
1222

1323
public Matcher matcher(String input) {
14-
JSException.throwUnimplemented();
15-
return null;
24+
JSRegexMatcher jsMatcher = this.jsPattern.getMatcher(input);
25+
return new Matcher(jsMatcher);
1626
}
1727
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package java.util.regex;
2+
3+
import java.lang.IllegalArgumentException;
4+
5+
public class PatternSyntaxException extends IllegalArgumentException {
6+
public PatternSyntaxException(String desc) {
7+
super(desc);
8+
}
9+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package org.cheminfo.utils;
2+
3+
import com.google.gwt.core.client.JavaScriptObject;
4+
5+
public class JSRegexMatchResult {
6+
private JavaScriptObject jsResult;
7+
8+
public JSRegexMatchResult(JavaScriptObject jsResult) {
9+
this.jsResult = jsResult;
10+
}
11+
12+
public String group(int group) {
13+
return groupNative(jsResult, group);
14+
}
15+
16+
public native String groupNative(JavaScriptObject result, int group)
17+
/*-{
18+
return result[group] || null;
19+
}-*/;
20+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package org.cheminfo.utils;
2+
3+
import com.google.gwt.core.client.JavaScriptObject;
4+
5+
public class JSRegexMatcher {
6+
private JavaScriptObject regex;
7+
private String input;
8+
9+
public JSRegexMatcher(JavaScriptObject regex, String input) {
10+
this.regex = regex;
11+
this.input = input;
12+
}
13+
14+
public JSRegexMatchResult find() {
15+
JavaScriptObject result = findNative(regex, input);
16+
if (result == null) {
17+
return null;
18+
} else {
19+
return new JSRegexMatchResult(result);
20+
}
21+
}
22+
23+
public native JavaScriptObject findNative(JavaScriptObject regex, String input)
24+
/*-{
25+
return regex.exec(input);
26+
}-*/;
27+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package org.cheminfo.utils;
2+
3+
import com.google.gwt.core.client.JavaScriptObject;
4+
5+
public class JSRegexPattern {
6+
private JavaScriptObject regex;
7+
8+
private JSRegexPattern(JavaScriptObject regex) {
9+
this.regex = regex;
10+
}
11+
12+
public static JSRegexPattern compile(String pattern) {
13+
JavaScriptObject regex = compileNative(pattern);
14+
if (regex == null) {
15+
return null;
16+
} else {
17+
return new JSRegexPattern(regex);
18+
}
19+
}
20+
21+
private native static JavaScriptObject compileNative(String pattern)
22+
/*-{
23+
try {
24+
return new RegExp(pattern);
25+
} catch (err) {
26+
return null;
27+
}
28+
}-*/;
29+
30+
public JSRegexMatcher getMatcher(String input) {
31+
return new JSRegexMatcher(this.regex, input);
32+
}
33+
}

0 commit comments

Comments
 (0)