diff --git a/modules/nf-commons/src/main/nextflow/util/HashBuilder.java b/modules/nf-commons/src/main/nextflow/util/HashBuilder.java index 8c14e282cb..f42046558f 100644 --- a/modules/nf-commons/src/main/nextflow/util/HashBuilder.java +++ b/modules/nf-commons/src/main/nextflow/util/HashBuilder.java @@ -19,6 +19,8 @@ import java.io.File; import java.io.IOException; import java.io.OutputStream; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; @@ -27,6 +29,7 @@ import java.nio.file.attribute.BasicFileAttributes; import java.util.Collection; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; import java.util.UUID; @@ -47,6 +50,7 @@ import nextflow.extension.FilesEx; import nextflow.io.SerializableMarker; import nextflow.script.types.Bag; +import nextflow.script.types.Record; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static nextflow.Const.DEFAULT_ROOT; @@ -152,6 +156,9 @@ else if( value instanceof CacheFunnel ) else if( value instanceof Map ) hashUnorderedCollection(hasher, ((Map) value).entrySet(), mode); + else if( value instanceof Record ) + hashUnorderedCollection(hasher, recordToMap((Record) value).entrySet(), mode); + else if( value instanceof Map.Entry ) { Map.Entry entry = (Map.Entry)value; with(entry.getKey()); @@ -461,6 +468,40 @@ static HashCode hashContent( Path file, HashFunction function ) { return hashFileContent(hasher, file).hash(); } + /** + * Extract a typed record's instance fields into a {@link Map} so the record can be + * hashed as an unordered collection of (field name, field value) entries — the same + * representation used for {@link RecordMap}. + * + *

This ensures user-defined records with identical field values produce identical + * hashes across runs, regardless of JVM identity hash codes. Records built via the + * stdlib {@code record(...)} helper (which return {@link RecordMap}) and records + * instantiated from a typed {@code record …} declaration hash to the same value + * when their fields match. + * + * @param record A {@link Record} instance whose public, non-static fields will be hashed. + * @return A map from field name to field value. + */ + static private Map recordToMap(Record record) { + if( record instanceof Map ) { + // already handled by the Map branch; defensive guard + return (Map) record; + } + var result = new LinkedHashMap(); + for( Field field : record.getClass().getFields() ) { + int mods = field.getModifiers(); + if( Modifier.isStatic(mods) || field.isSynthetic() ) + continue; + try { + result.put(field.getName(), field.get(record)); + } + catch (IllegalAccessException e) { + throw new IllegalStateException("Unable to read record field: " + field.getName(), e); + } + } + return result; + } + static private Hasher hashUnorderedCollection(Hasher hasher, Collection collection, HashMode mode) { byte[] resultBytes = new byte[HASH_BYTES]; for (Object item : collection) { diff --git a/modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy b/modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy index 7b156e9a9c..4bed240ad6 100644 --- a/modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy +++ b/modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy @@ -22,9 +22,28 @@ import java.nio.file.Paths import com.google.common.hash.Hashing import nextflow.Global import nextflow.Session +import nextflow.script.types.Record import org.apache.commons.codec.digest.DigestUtils import spock.lang.Specification import test.TestHelper + +class SampleRecord implements Record { + public String sample + public Integer count + SampleRecord(String sample, Integer count) { + this.sample = sample + this.count = count + } +} + +class NestedRecord implements Record { + public String id + public SampleRecord inner + NestedRecord(String id, SampleRecord inner) { + this.id = id + this.inner = inner + } +} /** * * @author Paolo Di Tommaso @@ -154,6 +173,78 @@ class HashBuilderTest extends Specification { hash1.hash() == hash2.hash() } + def 'should hash typed record by field values, independent of JVM identity'() { + given: + def r1 = new SampleRecord('alpha', 1) + def r2 = new SampleRecord('alpha', 1) + def r3 = new SampleRecord('beta', 1) + def r4 = new SampleRecord('alpha', 2) + + expect: 'records with same field values hash to the same value' + CacheHelper.hasher(r1).hash() == CacheHelper.hasher(r2).hash() + + and: 'records with different field values hash differently' + CacheHelper.hasher(r1).hash() != CacheHelper.hasher(r3).hash() + CacheHelper.hasher(r1).hash() != CacheHelper.hasher(r4).hash() + } + + def 'typed record and equivalent RecordMap should hash to the same value'() { + given: + def typed = new SampleRecord('alpha', 1) + def asMap = new RecordMap([sample: 'alpha', count: 1]) + + expect: + CacheHelper.hasher(typed).hash() == CacheHelper.hasher(asMap).hash() + } + + def 'record built with the record(...) built-in should hash the same as the typed constructor'() { + given: 'a record built with the stdlib record(...) helper' + def builtin = nextflow.Nextflow.record(sample: 'alpha', count: 1) + + and: 'an equivalent record built with the typed-record constructor' + def typed = new SampleRecord('alpha', 1) + + expect: + CacheHelper.hasher(builtin).hash() == CacheHelper.hasher(typed).hash() + } + + def 'record(...) built-in hash should not depend on the order of named arguments'() { + given: + def r1 = nextflow.Nextflow.record(sample: 'alpha', count: 1) + def r2 = nextflow.Nextflow.record(count: 1, sample: 'alpha') + + expect: + CacheHelper.hasher(r1).hash() == CacheHelper.hasher(r2).hash() + } + + def 'nested typed records should hash by value'() { + given: + def n1 = new NestedRecord('x', new SampleRecord('alpha', 1)) + def n2 = new NestedRecord('x', new SampleRecord('alpha', 1)) + def n3 = new NestedRecord('x', new SampleRecord('beta', 1)) + def n4 = new NestedRecord('y', new SampleRecord('alpha', 1)) + + expect: 'same outer and same inner field values hash equally' + CacheHelper.hasher(n1).hash() == CacheHelper.hasher(n2).hash() + + and: 'a change to the inner record invalidates the outer hash' + CacheHelper.hasher(n1).hash() != CacheHelper.hasher(n3).hash() + + and: 'a change to the outer field invalidates the hash' + CacheHelper.hasher(n1).hash() != CacheHelper.hasher(n4).hash() + } + + def 'nested typed record and nested RecordMap should hash equally'() { + given: + def typed = new NestedRecord('x', new SampleRecord('alpha', 1)) + def asMap = nextflow.Nextflow.record( + id: 'x', + inner: nextflow.Nextflow.record(sample: 'alpha', count: 1) ) + + expect: + CacheHelper.hasher(typed).hash() == CacheHelper.hasher(asMap).hash() + } + def 'directories with same content but different structure should yield different hashes'() { given: def folder = TestHelper.createInMemTempDir()