Skip to content

Commit 8d8d1fb

Browse files
committed
cache result of getArray() (fixes #13)
1 parent 1c520b1 commit 8d8d1fb

File tree

4 files changed

+33
-8
lines changed

4 files changed

+33
-8
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ CIFTools Changelog
33

44
This project uses semantic versioning. Furthermore, this project provides code that was generated from schemata. Any schema change that introduces a breaking change in the generated code is considered as breaking for the whole project. Additional information is provided below when this occurs (named `Breaking schema changes`). Most of these occur in experimental categories and are unlikely to affect your code. `Breaking API changes` will be avoided starting with version 1.0.0.
55

6+
ciftools-java 7.0.1 - March 2025
7+
-------------
8+
* cache results of `DelegatingColumn#getArray()` to avoid performance penalty if schema type and actual data type differ (fixes #13)
9+
610
ciftools-java 7.0.0 - March 2025
711
-------------
812
### Breaking schema changes

src/main/java/org/rcsb/cif/schema/DelegatingColumn.java

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
public class DelegatingColumn<T> implements Column<T> {
1111
protected final Column<?> delegate;
1212
protected final Class<T> type;
13+
protected T array;
1314

1415
@SuppressWarnings("unchecked")
1516
public DelegatingColumn(Column<?> delegate) {
@@ -44,16 +45,12 @@ public ValueKind getValueKind(int row) {
4445
@SuppressWarnings("unchecked")
4546
@Override
4647
public T getArray() {
47-
Object array = delegate.getArray();
48-
// matches expectation
49-
if (type.isInstance(array)) {
50-
return (T) array;
51-
}
52-
// empty column
5348
if (array == null) {
54-
return null;
49+
Object raw = delegate.getArray();
50+
// 1st condition: type matches expectation, 2nd check: empty column?
51+
array = (type.isInstance(raw)) ? (T) raw : (raw != null) ? forceType(raw) : null;
5552
}
56-
return forceType(array);
53+
return array;
5754
}
5855

5956
@SuppressWarnings("unchecked")

src/test/java/org/rcsb/cif/ReaderTest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import org.junit.jupiter.api.Test;
44
import org.rcsb.cif.model.CifFile;
5+
import org.rcsb.cif.model.FloatColumn;
56
import org.rcsb.cif.model.IntColumn;
67
import org.rcsb.cif.schema.StandardSchemata;
78
import org.rcsb.cif.schema.mm.AtomSite;
@@ -10,6 +11,8 @@
1011

1112
import java.io.IOException;
1213
import java.io.InputStream;
14+
import java.util.ArrayList;
15+
import java.util.Comparator;
1316
import java.util.List;
1417
import java.util.Map;
1518
import java.util.OptionalDouble;
@@ -125,4 +128,25 @@ void whenReadingStringWithEmptyQuotation_thenValueAvailable() throws IOException
125128
String gene = cifFile.getFirstBlock().getCategory("ma_target_ref_db_details").getColumn("gene_name").getStringData(0);
126129
assertEquals("''cytochrome P450", gene, "Gene name with additional quotes not parsed correctly");
127130
}
131+
132+
@Test
133+
void whenReadingBinaryColumnWithTypeMismatch_thenPerformanceNotDegrading() throws IOException {
134+
String id = "9a2f";
135+
InputStream inputStream = TestHelper.getInputStream("bcif/" + id + ".bcif.gz");
136+
AtomSite atomSite = CifIO.readFromInputStream(inputStream).as(StandardSchemata.MMCIF).getFirstBlock().getAtomSite();
137+
138+
// schema type is float but data is internally encoded as int -- assert that this doesn't degrade performance when accessing values one-by-one
139+
FloatColumn bIsoOrEquiv = atomSite.getBIsoOrEquiv();
140+
141+
long start = System.nanoTime();
142+
double sum = 0.0;
143+
for (int i = 0; i < bIsoOrEquiv.getRowCount(); i++) {
144+
// invokes `getArray()`, which used to convert the internal array representation again and again if types diverged
145+
sum += bIsoOrEquiv.get(i);
146+
}
147+
assertEquals(0.0, sum);
148+
long end = System.nanoTime();
149+
long delta_ms = (end - start) / 1_000_000;
150+
assertTrue(delta_ms < 250, "Access to took " + delta_ms + " ms and we deem that too slow");
151+
}
128152
}
2.42 MB
Binary file not shown.

0 commit comments

Comments
 (0)