-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathSamplesheetConverter.groovy
More file actions
303 lines (253 loc) · 11.6 KB
/
SamplesheetConverter.groovy
File metadata and controls
303 lines (253 loc) · 11.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
package nextflow.validation.samplesheet
import groovy.json.JsonSlurper
import groovy.util.logging.Slf4j
import java.nio.file.Path
import org.json.JSONArray
import nextflow.Nextflow
import static nextflow.validation.utils.Colors.getLogColors
import static nextflow.validation.utils.Files.fileToJson
import static nextflow.validation.utils.Files.fileToObject
import static nextflow.validation.utils.Files.getFileType
import static nextflow.validation.utils.Common.findDeep
import static nextflow.validation.utils.Common.hasDeepKey
import nextflow.validation.config.ValidationConfig
import nextflow.validation.exceptions.SchemaValidationException
import nextflow.validation.utils.WorkbookConverter
import nextflow.validation.validators.JsonSchemaValidator
import nextflow.validation.validators.ValidationResult
/**
* @author : mirpedrol <mirp.julia@gmail.com>
* @author : nvnieuwk <nicolas.vannieuwkerke@ugent.be>
* @author : awgymer
*/
@Slf4j
class SamplesheetConverter {
private ValidationConfig config
SamplesheetConverter(ValidationConfig config) {
this.config = config
}
private List<Map> rows = []
private Map meta = [:]
private Map getMeta() {
this.meta
}
private Map resetMeta() {
this.meta = [:]
}
private addMeta(Map newEntries) {
this.meta = this.meta + newEntries
}
private Boolean isMeta() {
this.meta.size() > 0
}
private List unrecognisedHeaders = []
private addUnrecognisedHeader (String header) {
this.unrecognisedHeaders.add(header)
}
private logUnrecognisedHeaders(String fileName) {
def Set unrecognisedHeaders = this.unrecognisedHeaders as Set
if(unrecognisedHeaders.size() > 0) {
def String processedHeaders = unrecognisedHeaders.collect { "\t- ${it}" }.join("\n")
def String msg = "Found the following unidentified headers in ${fileName}:\n${processedHeaders}\n" as String
config.logging.unrecognisedHeaders.log(msg)
}
}
/*
Convert the samplesheet to a list of entries based on a schema
*/
public List validateAndConvertToList(
Path samplesheetFile,
Path schemaFile,
Map options
) {
def colors = getLogColors(config.monochromeLogs)
// Some checks before validating
if(!schemaFile.exists()) {
def msg = "${colors.red}JSON schema file ${schemaFile.toString()} does not exist\n${colors.reset}\n"
throw new SchemaValidationException(msg)
}
def Map schemaMap = new JsonSlurper().parseText(schemaFile.text) as Map
def List<String> schemaKeys = schemaMap.keySet() as List<String>
if(schemaKeys.contains("properties") || !schemaKeys.contains("items")) {
def msg = "${colors.red}The schema for '${samplesheetFile.toString()}' (${schemaFile.toString()}) is not valid. Please make sure that 'items' is the top level keyword and not 'properties'\n${colors.reset}\n"
throw new SchemaValidationException(msg)
}
if(!samplesheetFile.exists()) {
def msg = "${colors.red}Samplesheet file ${samplesheetFile.toString()} does not exist\n${colors.reset}\n"
throw new SchemaValidationException(msg)
}
// Check if this is an Excel file and process accordingly
def String fileType = getFileType(samplesheetFile)
def JSONArray samplesheet
def List samplesheetList
if (fileType in ['xlsx', 'xlsm', 'xlsb', 'xls']) {
// Process Excel file using WorkbookConverter
def WorkbookConverter workbookConverter = new WorkbookConverter(config)
samplesheetList = workbookConverter.convertToList(samplesheetFile, options) as List
// Convert to JSON for validation - same as other formats
def jsonGenerator = new groovy.json.JsonGenerator.Options()
.excludeNulls()
.build()
samplesheet = new JSONArray(jsonGenerator.toJson(samplesheetList))
} else {
// Process other file formats
samplesheet = fileToJson(samplesheetFile, schemaFile) as JSONArray
samplesheetList = fileToObject(samplesheetFile, schemaFile) as List
}
// Validate
final validator = new JsonSchemaValidator(config)
def ValidationResult validationResult = validator.validate(samplesheet, schemaFile.toString())
def validationErrors = validationResult.getErrors('field')
if (validationErrors) {
def msg = "${colors.red}The following errors have been detected in ${samplesheetFile.toString()}:\n\n" + validationErrors.join('\n').trim() + "\n${colors.reset}\n"
log.error("Validation of samplesheet failed!")
throw new SchemaValidationException(msg, validationErrors)
}
// Convert (already done above for Excel files)
this.rows = []
def List channelFormat = samplesheetList.collect { entry ->
resetMeta()
def Object result = formatEntry(entry, schemaMap["items"] as Map)
if(isMeta()) {
if(result instanceof List) {
result.add(0,getMeta())
} else {
result = [getMeta(), result]
}
}
return result
}
logUnrecognisedHeaders(samplesheetFile.toString())
return channelFormat
}
/*
This function processes an input value based on a schema.
The output will be created for addition to the output channel.
*/
private Object formatEntry(Object input, Map schema, String headerPrefix = "") {
// Add default values for missing entries
input = input != null ? input : hasDeepKey(schema, "default") ? findDeep(schema, "default") : []
if (input instanceof Map) {
def List result = []
def Map properties = findDeep(schema, "properties") as Map
def Set unusedKeys = input.keySet() - properties.keySet()
// Check for properties in the samplesheet that have not been defined in the schema
unusedKeys.each{addUnrecognisedHeader("${headerPrefix}${it}" as String)}
// Loop over every property to maintain the correct order
properties.each { property, schemaValues ->
def value = input[property]
def List metaIds = schemaValues["meta"] instanceof List ? schemaValues["meta"] as List : schemaValues["meta"] instanceof String ? [schemaValues["meta"]] : []
def String prefix = headerPrefix ? "${headerPrefix}${property}." : "${property}."
// Add the value to the meta map if needed
if (metaIds) {
metaIds.each {
meta["${it}"] = processMeta(value, schemaValues as Map, prefix)
}
}
// return the correctly casted value
else {
result.add(formatEntry(value, schemaValues as Map, prefix))
}
}
return result
} else if (input instanceof List) {
def List result = []
def Integer count = 0
input.each {
// return the correctly casted value
def String prefix = headerPrefix ? "${headerPrefix}${count}." : "${count}."
result.add(formatEntry(it, findDeep(schema, "items") as Map, prefix))
count++
}
return result
} else {
// Cast value to path type if needed and return the value
return processValue(input, schema)
}
}
private List validPathFormats = ["file-path", "path", "directory-path", "file-path-pattern"]
private List schemaOptions = ["anyOf", "oneOf", "allOf"]
/*
This function processes a value that's not a map or list and casts it to a file type if necessary.
When there is uncertainty if the value should be a path, some simple logic is applied that tries
to guess if it should be a file type
*/
private Object processValue(Object value, Map schemaEntry) {
if(!(value instanceof String) || schemaEntry == null) {
return value
}
def String defaultFormat = schemaEntry.format ?: ""
// A valid path format has been found in the schema
def Boolean foundStringFileFormat = false
// Type string has been found without a valid path format
def Boolean foundStringNoFileFormat = false
if ((schemaEntry.type ?: "") == "string") {
if (validPathFormats.contains(schemaEntry.format ?: defaultFormat)) {
foundStringFileFormat = true
} else {
foundStringNoFileFormat = true
}
}
schemaOptions.each { option ->
schemaEntry[option]?.each { subSchema ->
if ((subSchema["type"] ?: "" ) == "string") {
if (validPathFormats.contains(subSchema["format"] ?: defaultFormat)) {
foundStringFileFormat = true
} else {
foundStringNoFileFormat = true
}
}
}
}
if(foundStringFileFormat && !foundStringNoFileFormat) {
return Nextflow.file(value)
} else if(foundStringFileFormat && foundStringNoFileFormat) {
// Do a simple check if the object could be a path
// This check looks for / in the filename or if a dot is
// present in the last 7 characters (possibly indicating an extension)
if(
value.contains("/") ||
(value.size() >= 7 && value[-7..-1].contains(".")) ||
(value.size() < 7 && value.contains("."))
) {
return Nextflow.file(value)
}
}
return value
}
/*
This function processes an input value based on a schema.
The output will be created for addition to the meta map.
*/
private Object processMeta(Object input, Map schema, String headerPrefix) {
// Add default values for missing entries
input = input != null ? input : hasDeepKey(schema, "default") ? findDeep(schema, "default") : []
if (input instanceof Map) {
def Map result = [:]
def Map properties = findDeep(schema, "properties") as Map
def Set unusedKeys = input.keySet() - properties.keySet()
// Check for properties in the samplesheet that have not been defined in the schema
unusedKeys.each{addUnrecognisedHeader("${headerPrefix}${it}" as String)}
// Loop over every property to maintain the correct order
properties.each { property, schemaValues ->
def value = input[property]
def String prefix = headerPrefix ? "${headerPrefix}${property}." : "${property}."
result[property] = processMeta(value, schemaValues as Map, prefix)
}
return result
} else if (input instanceof List) {
def List result = []
def Integer count = 0
input.each {
// return the correctly casted value
def String prefix = headerPrefix ? "${headerPrefix}${count}." : "${count}."
result.add(processMeta(it, findDeep(schema, "items") as Map, prefix))
count++
}
return result
} else {
// Cast value to path type if needed and return the value
return processValue(input, schema)
}
}
}