Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions modules/nf-commons/src/main/nextflow/util/HashBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
Expand All @@ -27,6 +29,7 @@
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
Expand All @@ -47,6 +50,7 @@
import nextflow.extension.FilesEx;
import nextflow.io.SerializableMarker;
import nextflow.script.types.Bag;
import nextflow.script.types.Record;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static nextflow.Const.DEFAULT_ROOT;
Expand Down Expand Up @@ -152,6 +156,9 @@ else if( value instanceof CacheFunnel )
else if( value instanceof Map )
hashUnorderedCollection(hasher, ((Map) value).entrySet(), mode);

else if( value instanceof Record )
hashUnorderedCollection(hasher, recordToMap((Record) value).entrySet(), mode);
Comment on lines +159 to +160

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Every record instance is a RecordMap, so it should be caught by the Map case above


else if( value instanceof Map.Entry ) {
Map.Entry entry = (Map.Entry)value;
with(entry.getKey());
Expand Down Expand Up @@ -461,6 +468,40 @@ static HashCode hashContent( Path file, HashFunction function ) {
return hashFileContent(hasher, file).hash();
}

/**
* Extract a typed record's instance fields into a {@link Map} so the record can be
* hashed as an unordered collection of (field name, field value) entries — the same
* representation used for {@link RecordMap}.
*
* <p>This ensures user-defined records with identical field values produce identical
* hashes across runs, regardless of JVM identity hash codes. Records built via the
* stdlib {@code record(...)} helper (which return {@link RecordMap}) and records
* instantiated from a typed {@code record …} declaration hash to the same value
* when their fields match.
*
* @param record A {@link Record} instance whose public, non-static fields will be hashed.
* @return A map from field name to field value.
*/
static private Map<String,Object> recordToMap(Record record) {
if( record instanceof Map ) {
// already handled by the Map branch; defensive guard
return (Map<String,Object>) record;
}
var result = new LinkedHashMap<String,Object>();
for( Field field : record.getClass().getFields() ) {
int mods = field.getModifiers();
if( Modifier.isStatic(mods) || field.isSynthetic() )
continue;
try {
result.put(field.getName(), field.get(record));
}
catch (IllegalAccessException e) {
throw new IllegalStateException("Unable to read record field: " + field.getName(), e);
}
}
return result;
}

static private Hasher hashUnorderedCollection(Hasher hasher, Collection collection, HashMode mode) {
byte[] resultBytes = new byte[HASH_BYTES];
for (Object item : collection) {
Expand Down
91 changes: 91 additions & 0 deletions modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,28 @@ import java.nio.file.Paths
import com.google.common.hash.Hashing
import nextflow.Global
import nextflow.Session
import nextflow.script.types.Record
import org.apache.commons.codec.digest.DigestUtils
import spock.lang.Specification
import test.TestHelper

class SampleRecord implements Record {
public String sample
public Integer count
SampleRecord(String sample, Integer count) {
this.sample = sample
this.count = count
}
}

class NestedRecord implements Record {
public String id
public SampleRecord inner
NestedRecord(String id, SampleRecord inner) {
this.id = id
this.inner = inner
}
}
/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
Expand Down Expand Up @@ -154,6 +173,78 @@ class HashBuilderTest extends Specification {
hash1.hash() == hash2.hash()
}

def 'should hash typed record by field values, independent of JVM identity'() {
given:
def r1 = new SampleRecord('alpha', 1)
def r2 = new SampleRecord('alpha', 1)
def r3 = new SampleRecord('beta', 1)
def r4 = new SampleRecord('alpha', 2)
Comment on lines +178 to +181

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Records cannot be created via constructor. But it may be that this happens to work and is only disallowed by the type checker


expect: 'records with same field values hash to the same value'
CacheHelper.hasher(r1).hash() == CacheHelper.hasher(r2).hash()

and: 'records with different field values hash differently'
CacheHelper.hasher(r1).hash() != CacheHelper.hasher(r3).hash()
CacheHelper.hasher(r1).hash() != CacheHelper.hasher(r4).hash()
}

def 'typed record and equivalent RecordMap should hash to the same value'() {
given:
def typed = new SampleRecord('alpha', 1)
def asMap = new RecordMap([sample: 'alpha', count: 1])

expect:
CacheHelper.hasher(typed).hash() == CacheHelper.hasher(asMap).hash()
}

def 'record built with the record(...) built-in should hash the same as the typed constructor'() {
given: 'a record built with the stdlib record(...) helper'
def builtin = nextflow.Nextflow.record(sample: 'alpha', count: 1)

and: 'an equivalent record built with the typed-record constructor'
def typed = new SampleRecord('alpha', 1)

expect:
CacheHelper.hasher(builtin).hash() == CacheHelper.hasher(typed).hash()
}

def 'record(...) built-in hash should not depend on the order of named arguments'() {
given:
def r1 = nextflow.Nextflow.record(sample: 'alpha', count: 1)
def r2 = nextflow.Nextflow.record(count: 1, sample: 'alpha')

expect:
CacheHelper.hasher(r1).hash() == CacheHelper.hasher(r2).hash()
}

def 'nested typed records should hash by value'() {
given:
def n1 = new NestedRecord('x', new SampleRecord('alpha', 1))
def n2 = new NestedRecord('x', new SampleRecord('alpha', 1))
def n3 = new NestedRecord('x', new SampleRecord('beta', 1))
def n4 = new NestedRecord('y', new SampleRecord('alpha', 1))

expect: 'same outer and same inner field values hash equally'
CacheHelper.hasher(n1).hash() == CacheHelper.hasher(n2).hash()

and: 'a change to the inner record invalidates the outer hash'
CacheHelper.hasher(n1).hash() != CacheHelper.hasher(n3).hash()

and: 'a change to the outer field invalidates the hash'
CacheHelper.hasher(n1).hash() != CacheHelper.hasher(n4).hash()
}

def 'nested typed record and nested RecordMap should hash equally'() {
given:
def typed = new NestedRecord('x', new SampleRecord('alpha', 1))
def asMap = nextflow.Nextflow.record(
id: 'x',
inner: nextflow.Nextflow.record(sample: 'alpha', count: 1) )

expect:
CacheHelper.hasher(typed).hash() == CacheHelper.hasher(asMap).hash()
}

def 'directories with same content but different structure should yield different hashes'() {
given:
def folder = TestHelper.createInMemTempDir()
Expand Down
Loading