Skip to content

Commit 2bb2f50

Browse files
authored
adding missing registrations in kryo (#4451)
* adding missing registrations in kryo some classes were missing registration in kryo which causes less efficient serialization * adding registrations for a number of classes that MarkDuplicatesSpark needs that weren't registered yet notably, BAMRecord wasn't registered to use the correct serializer which could cause major inefficiencies it's not clear what circumstances we're serializing BAMRecord instead of SAMRecordToGATKReadAdapter so how much this will help is not obvious * register everything twice on the off chance that we conflict in a bad way with ADAM's registration, register once before adam to set the initial registrations, and one after to override anything where we conflict
1 parent 8d929ed commit 2bb2f50

File tree

1 file changed

+37
-24
lines changed

1 file changed

+37
-24
lines changed

src/main/java/org/broadinstitute/hellbender/engine/spark/GATKRegistrator.java

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import com.esotericsoftware.kryo.Kryo;
44
import com.esotericsoftware.kryo.serializers.FieldSerializer;
55
import de.javakaffee.kryoserializers.UnmodifiableCollectionsSerializer;
6-
import htsjdk.samtools.SAMRecord;
6+
import htsjdk.samtools.*;
77
import org.apache.spark.serializer.KryoRegistrator;
88
import org.bdgenomics.adam.serialization.ADAMKryoRegistrator;
99
import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter;
@@ -17,35 +17,15 @@
1717
*/
1818
public class GATKRegistrator implements KryoRegistrator {
1919

20-
private ADAMKryoRegistrator ADAMregistrator;
20+
private final ADAMKryoRegistrator ADAMregistrator = new ADAMKryoRegistrator();
2121

22-
public GATKRegistrator() {
23-
this.ADAMregistrator = new ADAMKryoRegistrator();
24-
}
22+
public GATKRegistrator() {}
2523

26-
@SuppressWarnings({"unchecked", "rawtypes"})
2724
@Override
2825
public void registerClasses(Kryo kryo) {
2926

30-
//relatively inefficient serialization of Collections created with Collections.nCopies(), without this
31-
//any Collection created with Collections.nCopies fails to serialize at run time
32-
kryo.register(Collections.nCopies(2, "").getClass(), new FieldSerializer<>(kryo, Collections.nCopies(2, "").getClass()));
33-
34-
// htsjdk.variant.variantcontext.CommonInfo has a Map<String, Object> that defaults to
35-
// a Collections.unmodifiableMap. This can't be handled by the version of kryo used in Spark, it's fixed
36-
// in newer versions (3.0.x), but we can't use those because of incompatibility with Spark. We just include the
37-
// fix here.
38-
// We are tracking this issue with (#874)
39-
kryo.register(Collections.unmodifiableMap(Collections.EMPTY_MAP).getClass(), new UnmodifiableCollectionsSerializer());
40-
41-
kryo.register(Collections.unmodifiableList(Collections.EMPTY_LIST).getClass(), new UnmodifiableCollectionsSerializer());
42-
43-
kryo.register(SAMRecordToGATKReadAdapter.class, new SAMRecordToGATKReadAdapterSerializer());
27+
registerGATKClasses(kryo);
4428

45-
kryo.register(SAMRecord.class, new SAMRecordSerializer());
46-
47-
//register to avoid writing the full name of this class over and over
48-
kryo.register(PairedEnds.class, new FieldSerializer<>(kryo, PairedEnds.class));
4929

5030
// register the ADAM data types using Avro serialization, including:
5131
// AlignmentRecord
@@ -67,5 +47,38 @@ public void registerClasses(Kryo kryo) {
6747
// TargetSet
6848
// ZippedTargetSet
6949
ADAMregistrator.registerClasses(kryo);
50+
51+
//do this before and after ADAM to try and force our registrations to win out
52+
registerGATKClasses(kryo);
53+
}
54+
55+
@SuppressWarnings({"unchecked", "rawtypes"})
56+
private void registerGATKClasses(Kryo kryo) {
57+
//relatively inefficient serialization of Collections created with Collections.nCopies(), without this
58+
//any Collection created with Collections.nCopies fails to serialize at run time
59+
kryo.register(Collections.nCopies(2, "").getClass(), new FieldSerializer<>(kryo, Collections.nCopies(2, "").getClass()));
60+
61+
// htsjdk.variant.variantcontext.CommonInfo has a Map<String, Object> that defaults to
62+
// a Collections.unmodifiableMap. This can't be handled by the version of kryo used in Spark, it's fixed
63+
// in newer versions (3.0.x), but we can't use those because of incompatibility with Spark. We just include the
64+
// fix here.
65+
// We are tracking this issue with (#874)
66+
kryo.register(Collections.unmodifiableMap(Collections.EMPTY_MAP).getClass(), new UnmodifiableCollectionsSerializer());
67+
68+
kryo.register(Collections.unmodifiableList(Collections.EMPTY_LIST).getClass(), new UnmodifiableCollectionsSerializer());
69+
70+
kryo.register(SAMRecordToGATKReadAdapter.class, new SAMRecordToGATKReadAdapterSerializer());
71+
72+
kryo.register(SAMRecord.class, new SAMRecordSerializer());
73+
kryo.register(BAMRecord.class, new SAMRecordSerializer());
74+
75+
kryo.register(SAMFileHeader.class);
76+
kryo.register(SAMFileHeader.GroupOrder.class);
77+
kryo.register(SAMFileHeader.SortOrder.class);
78+
kryo.register(SAMProgramRecord.class);
79+
kryo.register(SAMReadGroupRecord.class);
80+
81+
//register to avoid writing the full name of this class over and over
82+
kryo.register(PairedEnds.class, new FieldSerializer<>(kryo, PairedEnds.class));
7083
}
7184
}

0 commit comments

Comments
 (0)