Skip to content

Commit 86df7b6

Browse files
committed
finalized bond order prediction for PDB and MMCIF files; added new simple MMCIFParser
1 parent 9252a51 commit 86df7b6

File tree

7 files changed

+181
-12
lines changed

7 files changed

+181
-12
lines changed

src/main/java/com/actelion/research/chem/io/CompoundFileHelper.java

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
import java.util.ArrayList;
4444

4545
public abstract class CompoundFileHelper {
46-
public static final int cFileTypeMask = 0x007FFFFF;
46+
public static final int cFileTypeMask = 0x00FFFFFF;
4747
public static final int cFileTypeDataWarrior = 0x00000001;
4848
public static final int cFileTypeDataWarriorTemplate = 0x00000002;
4949
public static final int cFileTypeDataWarriorQuery = 0x00000004;
@@ -70,9 +70,10 @@ public abstract class CompoundFileHelper {
7070
public static final int cFileTypeMOL = 0x00040000;
7171
public static final int cFileTypeMOL2 = 0x00080000;
7272
public static final int cFileTypePDB = 0x00100000;
73-
public static final int cFileTypeMMTF = 0x00200000;
74-
public static final int cFileTypeProtein = cFileTypePDB | cFileTypeMMTF;
75-
public static final int cFileTypeSDGZ = 0x00400000;
73+
public static final int cFileTypeMMCIF = 0x00200000;
74+
public static final int cFileTypeMMTF = 0x00400000;
75+
public static final int cFileTypeProtein = cFileTypePDB | cFileTypeMMCIF | cFileTypeMMTF;
76+
public static final int cFileTypeSDGZ = 0x00800000;
7677
public static final int cFileTypeUnknown = -1;
7778
public static final int cFileTypeDirectory = -2;
7879

@@ -375,9 +376,22 @@ public boolean accept(File f) {
375376
if (filetypes == cFileTypePictureFile) {
376377
filter.setDescription("Image files");
377378
}
378-
if ((filetypes & cFileTypePDB) != 0) {
379+
if ((filetypes & cFileTypePDB) != 0 && (filetypes & cFileTypeMMCIF) != 0) {
379380
filter.addExtension("pdb");
380-
filter.addDescription("Protein Data Bank files");
381+
filter.addExtension("cif");
382+
filter.addExtension("mmcif");
383+
filter.addDescription("PDB/MMCIF Protein Data Bank files");
384+
}
385+
else {
386+
if ((filetypes & cFileTypePDB) != 0) {
387+
filter.addExtension("pdb");
388+
filter.addDescription("Classical Protein Data Bank files");
389+
}
390+
if ((filetypes & cFileTypeMMCIF) != 0) {
391+
filter.addExtension("cif");
392+
filter.addExtension("mmcif");
393+
filter.addDescription("MMCIF Protein Data Bank files");
394+
}
381395
}
382396
if ((filetypes & cFileTypeMMTF) != 0) {
383397
filter.addExtension("mmtf");
@@ -452,8 +466,13 @@ public static String removeExtension(String filePath) {
452466
}
453467

454468
/**
455-
* Note: If
456-
* @param filename
469+
* @return one or multiple filtetypes that matching the extension of the given filename
470+
*/
471+
public static int getFileType(File file) {
472+
return file == null ? cFileTypeUnknown : getFileType(file.getName());
473+
}
474+
475+
/**
457476
* @return one or multiple filtetypes that matching the extension of the given filename
458477
*/
459478
public static int getFileType(String filename) {
@@ -499,6 +518,8 @@ public static int getFileType(String filename) {
499518
return cFileTypeMOL2;
500519
if (extension.equals(".pdb"))
501520
return cFileTypePDB;
521+
if (extension.equals(".cif") || extension.equals(".mmcif"))
522+
return cFileTypeMMCIF;
502523
if (extension.equals(".mmtf"))
503524
return cFileTypeMMTF;
504525

@@ -599,9 +620,19 @@ public static String[] getExtensions(int filetype) {
599620
case cFileTypePDB:
600621
extensions.add(".pdb");
601622
break;
623+
case cFileTypeMMCIF:
624+
extensions.add(".cif");
625+
extensions.add(".mmcif");
626+
break;
602627
case cFileTypeMMTF:
603628
extensions.add(".mmtf");
604629
break;
630+
case cFileTypeProtein:
631+
extensions.add(".pdb");
632+
extensions.add(".cif");
633+
extensions.add(".mmcif");
634+
extensions.add(".mmtf");
635+
break;
605636
case cFileTypeSDGZ:
606637
extensions.add(".sdf.gz");
607638
break;

src/main/java/com/actelion/research/chem/io/pdb/converter/BondOrderCalculator.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,12 @@ public class BondOrderCalculator {
5757
private static final double MIN_DIF_SINGLE_TO_AROM_BOND_LENGTH = 0.05;
5858
private static final double MIN_DIF_DOUBLE_TO_TRIPLE_BOND_LENGTH = 0.08;
5959
private static final double AROMATIC_5RING_BOND_LENGTH_TOLERANCE = 0.07; // larger values -> more aromatic 5-rings
60-
private static final double AROMATIC_5RING_MIN_BOND_LENGTH_SCORE = 1.0;
6160
private static final double RING_BOND_AROMATICITY_LIMIT = 0.25;
6261
private static final double SP_BOND_LENGTH_SUM_TOLERANCE = 0.1;
6362
private static final double SP_BOND_LENGTH_SUM_CONTRIBUTION_FACTOR = 10;
6463
private static final double PROPARGYL_LIKELYHOOD_INCREASE = 0.6;
6564
private static final double PATH_START_AND_END_TOLERANCE = 0.1;
66-
private static final double CHINONE_CONVERSION_MINIMUM_SCORE = 0.02;
65+
private static final double CHINONE_CONVERSION_MINIMUM_SCORE = 0.02; // was 0.02
6766
private static final double HETERO_ATOM_CHARGE_PENALTY = 0.1;
6867
private static final double DELOCALIZED_ZERO_PI_NITROGEN_MALUS = 0.05;
6968
private static final double ENOL_TAUTOMER_MALUS = 0.2;
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package com.actelion.research.chem.io.pdb.mmcif;
2+
3+
import com.actelion.research.chem.io.pdb.parser.AtomRecord;
4+
import com.actelion.research.chem.io.pdb.parser.PDBCoordEntryFile;
5+
import com.actelion.research.util.SortedList;
6+
7+
import java.io.*;
8+
import java.net.URI;
9+
import java.net.URLConnection;
10+
import java.nio.charset.StandardCharsets;
11+
import java.util.ArrayList;
12+
import java.util.TreeSet;
13+
import java.util.zip.GZIPInputStream;
14+
15+
public class MMCIFParser {
16+
public static PDBCoordEntryFile getFromPDB(String pdbID) throws Exception {
17+
URLConnection con = new URI("https://files.rcsb.org/download/"+pdbID+".cif.gz").toURL().openConnection();
18+
return MMCIFParser.parse(new BufferedReader(new InputStreamReader(new GZIPInputStream(con.getInputStream()))));
19+
}
20+
21+
public static PDBCoordEntryFile parse(String filename) throws IOException {
22+
return parse(new File(filename));
23+
}
24+
25+
public static PDBCoordEntryFile parse(File file) throws IOException {
26+
InputStream stream = file.getName().toLowerCase().endsWith(".cif.gz")
27+
|| file.getName().toLowerCase().endsWith(".mmcif.gz") ?
28+
new GZIPInputStream(new FileInputStream(file)) : new FileInputStream(file);
29+
return parse(new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)));
30+
}
31+
32+
public static PDBCoordEntryFile parse(BufferedReader reader) throws IOException {
33+
TreeSet<AtomRecord> proteinAtoms = new TreeSet<>();
34+
TreeSet<AtomRecord> hetAtoms = new TreeSet<>();
35+
36+
String line;
37+
while ((line = reader.readLine()) != null) {
38+
line = line.trim();
39+
if (line.equals("loop_")) {
40+
MMCIFTable table = new MMCIFTable(reader);
41+
if (table.getName().equals("_atom_site")) {
42+
String[] row;
43+
while ((row = table.parseRow(reader)) != null) {
44+
AtomRecord atomRecord = new AtomRecord(
45+
Integer.parseInt(row[table.getIndex("id")]),
46+
row[table.getIndex("label_atom_id")],
47+
row[table.getIndex("label_alt_id")],
48+
row[table.getIndex("label_comp_id")],
49+
row[table.getIndex("label_asym_id")],
50+
parseInt(row[table.getIndex("label_seq_id")]),
51+
row[table.getIndex("pdbx_PDB_ins_code")],
52+
Double.parseDouble(row[table.getIndex("Cartn_x")]),
53+
Double.parseDouble(row[table.getIndex("Cartn_y")]),
54+
Double.parseDouble(row[table.getIndex("Cartn_z")]),
55+
Double.parseDouble(row[table.getIndex("occupancy")]),
56+
Double.parseDouble(row[table.getIndex("B_iso_or_equiv")]),
57+
row[table.getIndex("type_symbol")]);
58+
String group = row[table.getIndex("group_PDB")];
59+
if (group.equals("ATOM"))
60+
proteinAtoms.add(atomRecord);
61+
if (group.equals("HETATM"))
62+
hetAtoms.add(atomRecord);
63+
}
64+
}
65+
}
66+
}
67+
68+
ArrayList<AtomRecord> protAtomList = new ArrayList<>(proteinAtoms);
69+
ArrayList<AtomRecord> hetAtomList = new ArrayList<>(hetAtoms);
70+
71+
PDBCoordEntryFile entryFile = new PDBCoordEntryFile();
72+
entryFile.setProtAtomRecords(protAtomList);
73+
entryFile.setHetAtomRecords(hetAtomList);
74+
entryFile.setLiConnect(new SortedList<>());
75+
return entryFile;
76+
}
77+
78+
private static int parseInt(String s) {
79+
return s.equals(".") ? 0 : Integer.parseInt(s);
80+
}
81+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package com.actelion.research.chem.io.pdb.mmcif;
2+
3+
import java.io.BufferedReader;
4+
import java.io.IOException;
5+
import java.util.ArrayList;
6+
import java.util.TreeMap;
7+
8+
public class MMCIFTable {
9+
private String mName,mLine;
10+
private String[] mHeaderName;
11+
private TreeMap<String,Integer> mHeaderNameToIndexMap;
12+
13+
public MMCIFTable(BufferedReader reader) throws IOException {
14+
mHeaderNameToIndexMap = new TreeMap<>();
15+
ArrayList<String> list = new ArrayList<>();
16+
mLine = reader.readLine();
17+
while (mLine.startsWith("_")) {
18+
mLine = mLine.trim();
19+
int index = mLine.indexOf('.');
20+
if (index == -1)
21+
throw new IOException("Missing '.' in table header line.");
22+
if (mName == null)
23+
mName = mLine.substring(0, index);
24+
else if (!mName.equals(mLine.substring(0, index)))
25+
throw new IOException("Inconsistent prefix in table header line.");
26+
if (mLine.indexOf('.', index+1) != -1)
27+
throw new IOException("Multiple '.' found in table header line.");
28+
String headerName = mLine.substring(index+1);
29+
mHeaderNameToIndexMap.put(headerName, list.size());
30+
list.add(headerName);
31+
mLine = reader.readLine();
32+
}
33+
mHeaderName = list.toArray(new String[0]);
34+
}
35+
36+
public String getName() {
37+
return mName;
38+
}
39+
40+
public String[] getHeaderNames() {
41+
return mHeaderName;
42+
}
43+
44+
public int getIndex(String headerName) {
45+
return mHeaderNameToIndexMap.get(headerName);
46+
}
47+
48+
public String[] parseRow(BufferedReader reader) throws IOException {
49+
if (mLine.startsWith("#"))
50+
return null;
51+
String[] row = mLine.split("\\s+");
52+
if (row.length != mHeaderName.length)
53+
throw new IOException("Inconsistent length of row entries ("+row.length+" and table headers ("+mHeaderName.length+").");
54+
mLine = reader.readLine();
55+
return row;
56+
}
57+
}

src/main/java/com/actelion/research/chem/io/pdb/parser/PDBFileParser.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ public class PDBFileParser {
194194

195195
private final ModelParser modelParser;
196196

197+
@Deprecated // use MMCIFParser.getFromPDB()
197198
public PDBCoordEntryFile getFromPDB(String pdbID) throws Exception {
198199
URLConnection con = new URI("https://files.rcsb.org/download/"+pdbID+".pdb.gz").toURL().openConnection();
199200
return new PDBFileParser().parse(new BufferedReader(new InputStreamReader(new GZIPInputStream(con.getInputStream()))));

src/main/java/com/actelion/research/gui/JChemistryView.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ private void updateBorder(boolean showBorder) {
421421
try {
422422
Reaction reaction = null;
423423

424-
if (FileHelper.getFileType(rxnFile.getName()) == FileHelper.cFileTypeRXN) {
424+
if (FileHelper.getFileType(rxnFile) == FileHelper.cFileTypeRXN) {
425425
reaction = new RXNFileParser().getReaction(rxnFile);
426426
}
427427
else {

src/main/java/com/actelion/research/gui/editor/GenericEditorArea.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -953,7 +953,7 @@ private void openReaction() {
953953
try {
954954
Reaction reaction = null;
955955

956-
if (FileHelper.getFileType(rxnFile.getName()) == FileHelper.cFileTypeRXN) {
956+
if (FileHelper.getFileType(rxnFile) == FileHelper.cFileTypeRXN) {
957957
reaction = new RXNFileParser().getReaction(rxnFile);
958958
} else {
959959
RDFileParser rdfParser = new RDFileParser(rxnFile);

0 commit comments

Comments
 (0)