From 9b8d9965003d597a40b5a4a7936878896ad8e50c Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sat, 17 May 2025 09:32:46 -0400 Subject: [PATCH 01/16] Update rule builder to allow list:paired_or_unpaired creation. --- .../RuleBuilder/rule-definitions.js | 8 ++ .../src/components/RuleCollectionBuilder.vue | 48 +++++++++- lib/galaxy/managers/collections.py | 25 ++++- lib/galaxy/util/rules_dsl.py | 7 ++ lib/galaxy_test/api/test_tools.py | 3 + lib/galaxy_test/base/rules_test_data.py | 94 +++++++++++++++++++ lib/galaxy_test/selenium/test_tool_form.py | 6 ++ 7 files changed, 185 insertions(+), 6 deletions(-) diff --git a/client/src/components/RuleBuilder/rule-definitions.js b/client/src/components/RuleBuilder/rule-definitions.js index aec823b5a9bc..d0f0d0a0586c 100644 --- a/client/src/components/RuleBuilder/rule-definitions.js +++ b/client/src/components/RuleBuilder/rule-definitions.js @@ -814,6 +814,14 @@ const MAPPING_TARGETS = { ), importType: "collections", }, + paired_or_unpaired_identifier: { + label: _l("Optional Paired-end Indicator (Advanced)"), + columnHeader: _l("Optional Paired Indicator"), + help: _l( + "This should be set to '1', 'R1', 'forward', 'f', or 'F' to indicate forward reads, and '2', 'r', 'reverse', 'R2', 'R', or 'R2' to indicate reverse reads. Unmatched '1' for 'forward' elements will be 'unpaired' in the resulting list, alternatively this column can be set to 'u' or 'unpaired' to force the element to be unpaired." + ), + importType: "collections", + }, collection_name: { label: _l("Collection Name"), help: _l( diff --git a/client/src/components/RuleCollectionBuilder.vue b/client/src/components/RuleCollectionBuilder.vue index c1ace0a568de..2e2005cb149f 100644 --- a/client/src/components/RuleCollectionBuilder.vue +++ b/client/src/components/RuleCollectionBuilder.vue @@ -1057,6 +1057,13 @@ export default { collectionType = "paired"; } } + if (this.mappingAsDict.paired_or_unpaired_identifier) { + if (collectionType) { + collectionType += ":paired_or_unpaired"; + } else { + collectionType = "paired_or_unpaired"; + } + } return collectionType; }, validName() { @@ -1556,6 +1563,9 @@ export default { if (this.mappingAsDict.paired_identifier) { identifierColumns.push(this.mappingAsDict.paired_identifier.columns[0]); } + if (this.mappingAsDict.paired_or_unpaired_identifier) { + identifierColumns.push(this.mappingAsDict.paired_or_unpaired_identifier.columns[0]); + } return identifierColumns; }, buildRequestElements(createDatasetDescription, createSubcollectionDescription, subElementProp) { @@ -1610,15 +1620,24 @@ export default { let identifier = String(rowData[identifierColumns[identifierColumnIndex]]); if (identifierColumnIndex + 1 == numIdentifierColumns) { // At correct final position in nested structure for this dataset. - if (collectionTypeAtDepth === "paired") { + if (["paired", "paired_or_unpaired"].indexOf(collectionTypeAtDepth) > -1) { if (["f", "1", "r1", "forward"].indexOf(identifier.toLowerCase()) > -1) { identifier = "forward"; } else if (["r", "2", "r2", "reverse"].indexOf(identifier.toLowerCase()) > -1) { identifier = "reverse"; + } else if ( + collectionTypeAtDepth == "paired_or_unpaired" && + ["unpaired", "u"].indexOf(identifier.toLowerCase()) > -1 + ) { + // assert collectionTypeAtDepth == paired_or_unpaired + identifier = "unpaired"; } else { this.state = "error"; - this.errorMessage = - "Unknown indicator of paired status encountered - only values of F, R, 1, 2, R1, R2, forward, or reverse are allowed."; + const allowedIndicators = ["F", "R", "1", "2", "R1", "R2", "forward", "reverse"]; + if (collectionTypeAtDepth == "paired_or_unpaired") { + allowedIndicators.push("unpaired", "u"); + } + this.errorMessage = `Unknown indicator (${identifier}) of paired status encountered - only values of (${allowedIndicators}) are allowed.`; return; } } @@ -1657,6 +1676,29 @@ export default { } } + // Recursively descend elements to handle "paired_or_unpaired" collections + const updateUnpairedIdentifiers = (elements) => { + for (const value of Object.values(elements)) { + if (typeof value !== "object" || value === null) { + continue; + } + if (value.src === "new_collection" && value.collection_type === "paired_or_unpaired") { + const subElements = value.elements; + if (subElements["forward"] && !subElements["reverse"]) { + subElements["unpaired"] = subElements["forward"]; + delete subElements["forward"]; + } + } + if (value.elements) { + updateUnpairedIdentifiers(value.elements); + } + } + }; + + if (collectionType.endsWith("paired_or_unpaired")) { + updateUnpairedIdentifiers(elements); + } + elementsByName[collectionName] = elements; } diff --git a/lib/galaxy/managers/collections.py b/lib/galaxy/managers/collections.py index 715f412f2445..dd23d8a527e4 100644 --- a/lib/galaxy/managers/collections.py +++ b/lib/galaxy/managers/collections.py @@ -744,14 +744,19 @@ def _build_elements_from_rule_data(self, collection_type_description, rule_set, if i + 1 == len(identifier_columns): # At correct final position in nested structure for this dataset. - if collection_type_at_depth.collection_type == "paired": + if collection_type_at_depth.collection_type in ["paired", "paired_or_unpaired"]: if identifier.lower() in ["f", "1", "r1", "forward"]: identifier = "forward" elif identifier.lower() in ["r", "2", "r2", "reverse"]: identifier = "reverse" + elif identifier.lower() in ["u", "unpaired"]: + identifier = "unpaired" else: + allow_identifiers = ["F", "R", "1", "2", "R1", "R2", "forward", "reverse"] + if collection_type_at_depth == "paired_or_unpaired": + allow_identifiers.extend(["unpaired", "u"]) raise Exception( - "Unknown indicator of paired status encountered - only values of F, R, 1, 2, R1, R2, forward, or reverse are allowed." + f"Unknown indicator of paired status encountered ({identifier}) - only values from ({allow_identifiers}) are allowed." ) tags = [] @@ -769,7 +774,6 @@ def _build_elements_from_rule_data(self, collection_type_description, rule_set, effective_dataset = handle_dataset(sources[data_index]["dataset"], tags) elements_at_depth[identifier] = effective_dataset - # log.info("Handling dataset [%s] with sources [%s], need to add tags [%s]" % (effective_dataset, sources, tags)) else: collection_type_at_depth = collection_type_at_depth.child_collection_type_description() found = False @@ -788,6 +792,21 @@ def _build_elements_from_rule_data(self, collection_type_description, rule_set, # Subsequent loop fills elements of newly created collection elements_at_depth = sub_collection["elements"] + # Recursively descend elements to handle "paired_or_unpaired" collections + def update_unpaired_identifiers(elements): + for value in elements.values(): + if not isinstance(value, dict): + continue + if value.get("src") == "new_collection" and value.get("collection_type") == "paired_or_unpaired": + sub_elements = value["elements"] + if "forward" in sub_elements and "reverse" not in sub_elements: + sub_elements["unpaired"] = sub_elements.pop("forward") + if "elements" in value: + update_unpaired_identifiers(value["elements"]) + + if collection_type_description.collection_type.endswith("paired_or_unpaired"): + update_unpaired_identifiers(elements) + return elements def __init_rule_data(self, elements, collection_type_description, parent_identifiers=None, parent_indices=None): diff --git a/lib/galaxy/util/rules_dsl.py b/lib/galaxy/util/rules_dsl.py index 7657eebebc3d..516ece43c94a 100644 --- a/lib/galaxy/util/rules_dsl.py +++ b/lib/galaxy/util/rules_dsl.py @@ -596,6 +596,8 @@ def identifier_columns(self): identifier_columns.extend(mapping_as_dict["list_identifiers"]["columns"]) if "paired_identifier" in mapping_as_dict: identifier_columns.append(mapping_as_dict["paired_identifier"]["columns"][0]) + if "paired_or_unpaired_identifier" in mapping_as_dict: + identifier_columns.append(mapping_as_dict["paired_or_unpaired_identifier"]["columns"][0]) return identifier_columns @@ -609,6 +611,11 @@ def collection_type(self): collection_type += ":paired" else: collection_type = "paired" + if "paired_or_unpaired_identifier" in mapping_as_dict: + if collection_type: + collection_type += ":paired_or_unpaired" + else: + collection_type = "paired_or_unpaired" return collection_type @property diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 2b5fbafe66b0..40e289093d7b 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -883,6 +883,9 @@ def test_apply_rules_5(self): def test_apply_rules_6(self): self._apply_rules_and_check(rules_test_data.EXAMPLE_6) + def test_apply_rules_create_paired_or_unpaired_list(self): + self._apply_rules_and_check(rules_test_data.EXAMPLE_CREATE_PAIRED_OR_UNPAIRED_COLLECTION) + def test_apply_rules_flatten_with_indices(self): self._apply_rules_and_check(rules_test_data.EXAMPLE_FLATTEN_USING_INDICES) diff --git a/lib/galaxy_test/base/rules_test_data.py b/lib/galaxy_test/base/rules_test_data.py index 3eb7cea1f09d..5f2c445980af 100644 --- a/lib/galaxy_test/base/rules_test_data.py +++ b/lib/galaxy_test/base/rules_test_data.py @@ -348,6 +348,100 @@ def check_example_flatten_with_indices(hdca, dataset_populator): } +def check_example_create_paired_or_unpaired_with_unmatched_forward_becoming_unpaired(hdca, dataset_populator): + assert hdca["collection_type"] == "list:paired_or_unpaired" + assert hdca["element_count"] == 4 + sample1_el = hdca["elements"][0] + assert "object" in sample1_el, hdca + assert "element_identifier" in sample1_el + assert sample1_el["element_identifier"] == "sample1", hdca + child_collection_level = sample1_el["object"] + assert child_collection_level["collection_type"] == "paired_or_unpaired" + assert child_collection_level["elements"][0]["element_identifier"] == "forward", hdca + + sample2_el = hdca["elements"][1] + assert "object" in sample2_el, hdca + assert "element_identifier" in sample2_el + assert sample2_el["element_identifier"] == "sample2", hdca + child_collection_level = sample2_el["object"] + assert child_collection_level["collection_type"] == "paired_or_unpaired" + assert child_collection_level["elements"][0]["element_identifier"] == "unpaired", hdca + + sample3_el = hdca["elements"][2] + assert "object" in sample3_el, hdca + assert "element_identifier" in sample3_el + assert sample3_el["element_identifier"] == "sample3", hdca + child_collection_level = sample3_el["object"] + assert child_collection_level["collection_type"] == "paired_or_unpaired" + assert child_collection_level["elements"][0]["element_identifier"] == "unpaired", hdca + + sample4_el = hdca["elements"][2] + assert "object" in sample4_el, hdca + assert "element_identifier" in sample4_el + assert sample4_el["element_identifier"] == "sample3", hdca + child_collection_level = sample4_el["object"] + assert child_collection_level["collection_type"] == "paired_or_unpaired" + assert child_collection_level["elements"][0]["element_identifier"] == "unpaired", hdca + + +EXAMPLE_CREATE_PAIRED_OR_UNPAIRED_COLLECTION = { + "rules": { + "rules": [ + { + "type": "add_column_metadata", + "value": "identifier0", + }, + { + "type": "add_column_metadata", + "value": "identifier1", + }, + ], + "mapping": [ + { + "type": "list_identifiers", + "columns": [0], + }, + { + "type": "paired_or_unpaired_identifier", + "columns": [1], + }, + ], + }, + "test_data": { + "type": "list:list", + "elements": [ + { + "identifier": "sample1", + "elements": [ + {"identifier": "forward", "class": "File", "contents": "TestData123forward"}, + {"identifier": "reverse", "class": "File", "contents": "TestData123reverse"}, + ], + }, + { + "identifier": "sample2", + "elements": [ + {"identifier": "unpaired", "class": "File", "contents": "TestData123unpaired"}, + ], + }, + { + "identifier": "sample3", + "elements": [ + {"identifier": "u", "class": "File", "contents": "TestData123unpaired-2"}, + ], + }, + { + "identifier": "sample4", + "elements": [ + {"identifier": "forward", "class": "File", "contents": "TestData123unpaired-3"}, + ], + }, + ], + }, + "check": check_example_create_paired_or_unpaired_with_unmatched_forward_becoming_unpaired, + "output_hid": 12, +} + + def check_example_flatten_paired_or_unpaired(hdca, dataset_populator): assert hdca["collection_type"] == "list" assert hdca["element_count"] == 3 diff --git a/lib/galaxy_test/selenium/test_tool_form.py b/lib/galaxy_test/selenium/test_tool_form.py index 79a7f0b3cae1..4d49d09523f8 100644 --- a/lib/galaxy_test/selenium/test_tool_form.py +++ b/lib/galaxy_test/selenium/test_tool_form.py @@ -392,6 +392,12 @@ def test_run_apply_rules_paired_unpaired_flatten(self): self._apply_rules_and_check(rules_test_data.EXAMPLE_FLATTEN_PAIRED_OR_UNPAIRED) self.screenshot("tool_apply_rules_example_flatten_paired_unpaired_final") + @selenium_test + @managed_history + def test_run_apply_rules_create_paired_or_unpaired_list(self): + self._apply_rules_and_check(rules_test_data.EXAMPLE_CREATE_PAIRED_OR_UNPAIRED_COLLECTION) + self.screenshot("tool_apply_rules_example_flatten_paired_unpaired_final") + @selenium_test def test_run_apply_rules_flatten_with_indices(self): self._apply_rules_and_check(rules_test_data.EXAMPLE_FLATTEN_USING_INDICES) From 0cfb588aaf7d5345f08106a933848753e0443341 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sat, 17 May 2025 12:58:23 -0400 Subject: [PATCH 02/16] Test case for rule builder backend errors. --- lib/galaxy_test/api/test_tools.py | 48 +++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 40e289093d7b..9dde25e9ded0 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -862,6 +862,54 @@ def _apply_rules_and_check(self, example: Dict[str, Any]) -> None: output_hdca = self.dataset_populator.get_history_collection_details(history_id, hid=output_hid, wait=False) example["check"](output_hdca, self.dataset_populator) + def test_apply_rules_with_error_in_mapping(self): + # this would produce a list:paired but the child identifiers are incorrectly spelled + example_with_mapping_error = { + "rules": { + "rules": [ + { + "type": "add_column_metadata", + "value": "identifier0", + }, + { + "type": "add_column_metadata", + "value": "identifier1", + }, + ], + "mapping": [ + { + "type": "list_identifiers", + "columns": [0], + }, + { + "type": "paired_identifier", + "columns": [1], + }, + ], + }, + "test_data": { + "type": "list:list", + "elements": [ + { + "identifier": "sample1", + "elements": [ + {"identifier": "floorward", "class": "File", "contents": "TestData123forward"}, + {"identifier": "reverb", "class": "File", "contents": "TestData123reverse"}, + ], + }, + ], + }, + } + with self.dataset_populator.test_history(require_new=False) as history_id: + inputs = stage_rules_example(self.galaxy_interactor, history_id, example_with_mapping_error) + hdca = inputs["input"] + inputs = {"input": {"src": "hdca", "id": hdca["id"]}, "rules": example_with_mapping_error["rules"]} + + self.dataset_populator.wait_for_history(history_id) + response = self._run("__APPLY_RULES__", history_id, inputs, assert_ok=False) + assert_status_code_is(response, 400) + assert "Unknown indicator of paired status encountered (floorward)" in response.json()["err_msg"] + def test_apply_rules_flatten_paired_unpaired(self): self._apply_rules_and_check(rules_test_data.EXAMPLE_FLATTEN_PAIRED_OR_UNPAIRED) From 6f6e97daab4dbcf61204983ecc7796a53c3654cb Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sat, 17 May 2025 13:17:25 -0400 Subject: [PATCH 03/16] Implement hash validation in rule builder. --- .../components/RuleBuilder/rule-definitions.js | 16 ++++++++++++++++ .../src/components/RuleCollectionBuilder.vue | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/client/src/components/RuleBuilder/rule-definitions.js b/client/src/components/RuleBuilder/rule-definitions.js index d0f0d0a0586c..9888d1535ee4 100644 --- a/client/src/components/RuleBuilder/rule-definitions.js +++ b/client/src/components/RuleBuilder/rule-definitions.js @@ -860,6 +860,22 @@ const MAPPING_TARGETS = { label: _l("Genome"), modes: ["raw", "ftp"], }, + hash_sha1: { + label: _l("Hash (SHA1)"), + modes: ["raw", "ftp"], + }, + hash_md5: { + label: _l("Hash (MD5)"), + modes: ["raw", "ftp"], + }, + hash_sha256: { + label: _l("Hash (SHA256)"), + modes: ["raw", "ftp"], + }, + hash_sha512: { + label: _l("Hash (SHA512)"), + modes: ["raw", "ftp"], + }, file_type: { label: _l("Type"), modes: ["raw", "ftp"], diff --git a/client/src/components/RuleCollectionBuilder.vue b/client/src/components/RuleCollectionBuilder.vue index 2e2005cb149f..25ea0e9e8590 100644 --- a/client/src/components/RuleCollectionBuilder.vue +++ b/client/src/components/RuleCollectionBuilder.vue @@ -1860,6 +1860,24 @@ export default { const info = data[dataIndex][infoColumn]; res["info"] = info; } + const hashTypes = [ + { key: "hash_md5", function: "MD5" }, + { key: "hash_sha1", function: "SHA1" }, + { key: "hash_sha256", function: "SHA256" }, + { key: "hash_sha515", function: "SHA512" }, + ]; + + hashTypes.forEach(({ key, function: hashFunction }) => { + if (mappingAsDict[key]) { + const hashColumn = mappingAsDict[key].columns[0]; + const hash = data[dataIndex][hashColumn]; + if (res.hashes === undefined) { + res["hashes"] = []; + } + res["hashes"].push({ hash_function: hashFunction, hash_value: hash }); + } + }); + const tags = []; if (mappingAsDict.tags) { const tagColumns = mappingAsDict.tags.columns; From 4853d2a170d01ed712f7ecc2524d99442a98fd14 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sun, 18 May 2025 08:31:13 -0400 Subject: [PATCH 04/16] Bug fix - missing help text in rule builder. --- client/src/components/RuleBuilder/ColumnSelector.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/src/components/RuleBuilder/ColumnSelector.vue b/client/src/components/RuleBuilder/ColumnSelector.vue index 5acc6d14739c..8a89c966272c 100644 --- a/client/src/components/RuleBuilder/ColumnSelector.vue +++ b/client/src/components/RuleBuilder/ColumnSelector.vue @@ -11,7 +11,7 @@
- {{ label }} + {{ label }}
  1. From 49069f1ab15087b9b770ab1b7a9fe492e47059e5 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sun, 18 May 2025 08:41:25 -0400 Subject: [PATCH 05/16] Refactor HelpText style for reuse in other context. Apply help text styling in rule builder. --- client/src/components/Help/HelpText.vue | 11 +---------- client/src/components/Help/help-text.scss | 8 ++++++++ client/src/components/RuleBuilder/ColumnSelector.vue | 6 ++++-- 3 files changed, 13 insertions(+), 12 deletions(-) create mode 100644 client/src/components/Help/help-text.scss diff --git a/client/src/components/Help/HelpText.vue b/client/src/components/Help/HelpText.vue index b35a420a7dae..f24ea8c4fa74 100644 --- a/client/src/components/Help/HelpText.vue +++ b/client/src/components/Help/HelpText.vue @@ -23,13 +23,4 @@ const helpTarget = ref(); - + diff --git a/client/src/components/Collections/common/useWorkbooks.ts b/client/src/components/Collections/common/useWorkbooks.ts new file mode 100644 index 000000000000..86043e9b3526 --- /dev/null +++ b/client/src/components/Collections/common/useWorkbooks.ts @@ -0,0 +1,101 @@ +import { faUpload } from "@fortawesome/free-solid-svg-icons"; +import { FontAwesomeIcon } from "@fortawesome/vue-fontawesome"; +import { computed, ref } from "vue"; + +import HiddenWorkbookUploadInput from "@/components/Collections/wizard/HiddenWorkbookUploadInput.vue"; + +export const fileToBase64 = (file: File): Promise => + new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(reader.result as string); + reader.onerror = (error) => reject(error); + reader.readAsDataURL(file); + }); + +export type WorkbookHandler = (workbookContentBase64: string) => Promise; + +export function useWorkbookDropHandling(workbookHandler: WorkbookHandler) { + const uploadErrorMessage = ref(undefined); + const isDragging = ref(false); + const isProcessingUpload = ref(false); + + async function onFileUpload(event: Event) { + const input = event.target as HTMLInputElement; + const file = input.files?.[0] ?? null; + if (file) { + const base64Content = await readAsBase64(file); + workbookHandler(base64Content); + } + } + + async function readAsBase64(file: File) { + const fileContent = await fileToBase64(file); + const base64Content = fileContent.split(",")[1] as string; + return base64Content; + } + + function checkDrop(event: DragEvent): File | undefined { + const file = event.dataTransfer?.files[0]; + if (!file || !file.name.endsWith(".xlsx")) { + uploadErrorMessage.value = "Please drop a valid XLSX file."; + return undefined; + } + return file; + } + + const handleDrop = async (event: DragEvent) => { + const file = checkDrop(event); + if (!file) { + return; + } + + isDragging.value = false; + isProcessingUpload.value = true; + try { + // Read and base64 encode the file + const base64Content = await readAsBase64(file); + await workbookHandler(base64Content); + } catch (error) { + console.error("Error uploading file:", error); + uploadErrorMessage.value = "There was an error processing the file."; + } finally { + isProcessingUpload.value = false; + } + }; + + const uploadRef = ref>(); + + interface HasBrowse { + browse: () => void; + } + + function browseFiles() { + const ref = uploadRef.value; + if (ref) { + (ref as unknown as HasBrowse).browse(); + } + } + + const dropZoneClasses = computed(() => { + const classes = ["dropzone"]; + if (isDragging.value) { + classes.push("highlight"); + } + return classes; + }); + + return { + browseFiles, + dropZoneClasses, + FontAwesomeIcon, + faUpload, + readAsBase64, + isDragging, + isProcessingUpload, + handleDrop, + HiddenWorkbookUploadInput, + onFileUpload, + uploadErrorMessage, + uploadRef, + }; +} diff --git a/client/src/components/Collections/wizard/CardDownloadWorkbook.vue b/client/src/components/Collections/wizard/CardDownloadWorkbook.vue new file mode 100644 index 000000000000..61e632024bdf --- /dev/null +++ b/client/src/components/Collections/wizard/CardDownloadWorkbook.vue @@ -0,0 +1,20 @@ + + + diff --git a/client/src/components/Collections/wizard/CardEditWorkbook.vue b/client/src/components/Collections/wizard/CardEditWorkbook.vue new file mode 100644 index 000000000000..10fe17b728eb --- /dev/null +++ b/client/src/components/Collections/wizard/CardEditWorkbook.vue @@ -0,0 +1,15 @@ + + + diff --git a/client/src/components/Collections/wizard/CardUploadWorkbook.vue b/client/src/components/Collections/wizard/CardUploadWorkbook.vue new file mode 100644 index 000000000000..a98f3243246c --- /dev/null +++ b/client/src/components/Collections/wizard/CardUploadWorkbook.vue @@ -0,0 +1,58 @@ + + + + diff --git a/client/src/components/Collections/wizard/ConfigureFetchWorkbook.vue b/client/src/components/Collections/wizard/ConfigureFetchWorkbook.vue new file mode 100644 index 000000000000..d265efcde36e --- /dev/null +++ b/client/src/components/Collections/wizard/ConfigureFetchWorkbook.vue @@ -0,0 +1,50 @@ + + + + + diff --git a/client/src/components/Collections/wizard/HiddenWorkbookUploadInput.vue b/client/src/components/Collections/wizard/HiddenWorkbookUploadInput.vue new file mode 100644 index 000000000000..0e54ba459dc1 --- /dev/null +++ b/client/src/components/Collections/wizard/HiddenWorkbookUploadInput.vue @@ -0,0 +1,24 @@ + + + diff --git a/client/src/components/Collections/wizard/SourceFromWorkbook.vue b/client/src/components/Collections/wizard/SourceFromWorkbook.vue new file mode 100644 index 000000000000..6c2daa1b90e8 --- /dev/null +++ b/client/src/components/Collections/wizard/SourceFromWorkbook.vue @@ -0,0 +1,41 @@ + + + diff --git a/client/src/components/Collections/wizard/UploadFetchWorkbook.vue b/client/src/components/Collections/wizard/UploadFetchWorkbook.vue new file mode 100644 index 000000000000..428d17fbc46b --- /dev/null +++ b/client/src/components/Collections/wizard/UploadFetchWorkbook.vue @@ -0,0 +1,51 @@ + + + diff --git a/client/src/components/Collections/wizard/WhichWorkbookCollectionType.vue b/client/src/components/Collections/wizard/WhichWorkbookCollectionType.vue new file mode 100644 index 000000000000..e05697967f6c --- /dev/null +++ b/client/src/components/Collections/wizard/WhichWorkbookCollectionType.vue @@ -0,0 +1,92 @@ + + + diff --git a/client/src/components/Collections/wizard/fetchWorkbooks.test.ts b/client/src/components/Collections/wizard/fetchWorkbooks.test.ts new file mode 100644 index 000000000000..1cc0946d842e --- /dev/null +++ b/client/src/components/Collections/wizard/fetchWorkbooks.test.ts @@ -0,0 +1,33 @@ +import { forBuilder } from "./fetchWorkbooks"; +import type { ParsedFetchWorkbook } from "./types"; + +describe("forBuilder", () => { + it("should return the correct ForBuilderResponse for a valid ParsedFetchWorkbook", () => { + const parsedWorkbook: ParsedFetchWorkbook = { + rows: [ + { list_identifiers: "Row1", url: "http://example.com/1", dbkey: "db1" }, + { list_identifiers: "Row2", url: "http://example.com/2", dbkey: "db2" }, + ], + columns: [ + { type: "list_identifiers", title: "Name", type_index: 0 }, + { type: "url", title: "URI", type_index: 0 }, + { type: "dbkey", title: "Genome", type_index: 0 }, + ], + workbook_type: "datasets", + parse_log: [], + }; + + const result = forBuilder(parsedWorkbook); + + expect(result.initialElements).toEqual([ + ["Row1", "http://example.com/1", "db1"], + ["Row2", "http://example.com/2", "db2"], + ]); + expect(result.rulesCreatingWhat).toBe("datasets"); + expect(result.initialMapping).toEqual([ + { type: "list_identifiers", columns: [0] }, + { type: "url", columns: [1] }, + { type: "dbkey", columns: [2] }, + ]); + }); +}); diff --git a/client/src/components/Collections/wizard/fetchWorkbooks.ts b/client/src/components/Collections/wizard/fetchWorkbooks.ts new file mode 100644 index 000000000000..79bed4141f46 --- /dev/null +++ b/client/src/components/Collections/wizard/fetchWorkbooks.ts @@ -0,0 +1,79 @@ +// utilities for populating the rule builder from parsed "fetch workbook"s. +import type { + ColumnMappingType, + ParsedFetchWorkbook, + ParsedFetchWorkbookColumn, + RawRowData, + RuleBuilderMapping, + RulesCreatingWhat, +} from "./types"; + +export function hasData(parsedWorkbook: ParsedFetchWorkbook): boolean { + return parsedWorkbook.rows.length > 0; +} + +export interface ForBuilderResponse { + initialElements: string[][]; + rulesCreatingWhat: RulesCreatingWhat; + initialMapping: RuleBuilderMapping; +} + +export function forBuilder(parsedWorkbook: ParsedFetchWorkbook): ForBuilderResponse { + const initialElements: RawRowData = []; + const rulesCreatingWhat = creatingWhat(parsedWorkbook); + for (const row of parsedWorkbook.rows) { + const rowAsString: string[] = []; + for (const column of parsedWorkbook.columns) { + const rowKey: string = columnToRowKey(column); + const cellValue = row[rowKey]; + if (cellValue === undefined) { + throw Error("Error processing server response."); + } + rowAsString.push(cellValue); + } + initialElements.push(rowAsString); + } + const initialMapping = buildInitialMapping(parsedWorkbook); + return { + initialElements, + initialMapping, + rulesCreatingWhat, + }; +} + +function columnToRowKey(column: ParsedFetchWorkbookColumn): string { + if (column.type_index == 0) { + return column.type; + } else { + return `${column.type}_${column.type_index}`; + } +} + +function creatingWhat(parsedWorkbook: ParsedFetchWorkbook): RulesCreatingWhat { + if (parsedWorkbook.workbook_type == "datasets") { + return "datasets"; + } else { + return "collections"; + } +} + +function buildInitialMapping(parsedWorkbook: ParsedFetchWorkbook): RuleBuilderMapping { + const columnMappings: RuleBuilderMapping = []; + for (let index = 0; index < parsedWorkbook.columns.length; index++) { + const column = parsedWorkbook.columns[index] as ParsedFetchWorkbookColumn; + const type: ColumnMappingType = column.type; + if (column.type_index > 0) { + for (const columnMapping of columnMappings) { + if (columnMapping.type == type) { + columnMapping.columns.push(index); + } + } + } else { + columnMappings.push({ + type: type, + columns: [index], + }); + } + } + return columnMappings; +} diff --git a/client/src/components/Collections/wizard/types.ts b/client/src/components/Collections/wizard/types.ts index e1ff85de05b8..7ce84043291c 100644 --- a/client/src/components/Collections/wizard/types.ts +++ b/client/src/components/Collections/wizard/types.ts @@ -1,18 +1,32 @@ import type { components } from "@/api/schema"; +import type { MAPPING_TARGETS } from "@/components/RuleBuilder/rule-definitions"; export type RulesCreatingWhat = "datasets" | "collections"; -export type RulesSourceFrom = "remote_files" | "pasted_table" | "dataset_as_table" | "collection"; +export type RulesSourceFrom = "remote_files" | "pasted_table" | "dataset_as_table" | "collection" | "workbook"; export type ListUriResponse = components["schemas"]["ListUriResponse"]; export type RemoteFile = components["schemas"]["RemoteFile"]; export type RemoteDirectory = components["schemas"]["RemoteDirectory"]; +export type ParsedFetchWorkbookForCollections = components["schemas"]["ParsedFetchWorkbookForCollections"]; +export type ParsedFetchWorkbookForDatasets = components["schemas"]["ParsedFetchWorkbookForDatasets"]; +export type ParsedFetchWorkbook = ParsedFetchWorkbookForCollections | ParsedFetchWorkbookForDatasets; +export type ParsedFetchWorkbookColumn = components["schemas"]["ParsedColumn"]; +export type ParsedFetchWorkbookForCollectionCollectionType = + components["schemas"]["ParsedFetchWorkbookForCollections"]["collection_type"]; export type RawRowData = string[][]; -export type InitialElements = RawRowData | HDCADetailed; // types and helpers around initializing the rule builder with data export type RuleSelectionType = "raw" | "remote_files"; export type RuleElementsType = RemoteFile[] | string[][]; +export type ColumnMappingType = keyof typeof MAPPING_TARGETS; +export type ParsedFetchWorkbookColumnType = ParsedFetchWorkbookColumn["type"]; + +export interface RuleBuilderSingleMapping { + type: ColumnMappingType; + columns: number[]; +} +export type RuleBuilderMapping = RuleBuilderSingleMapping[]; // it would be nice to have a real type from the rule builder but // it is older code. This is really outlining what this component can @@ -24,4 +38,5 @@ export interface RuleBuilderOptions { elements?: RuleElementsType | undefined; content?: string; selectionType: RuleSelectionType; + initialMappings?: RuleBuilderMapping; } diff --git a/client/src/components/Collections/wizard/workbook-dropzones.scss b/client/src/components/Collections/wizard/workbook-dropzones.scss new file mode 100644 index 000000000000..3f7c822753b4 --- /dev/null +++ b/client/src/components/Collections/wizard/workbook-dropzones.scss @@ -0,0 +1,9 @@ +@import "theme/blue.scss"; + +.dropzone.highlight { + border-width: 2px; + border-color: $border-color; + border-style: dashed; + border-radius: $border-radius-large; + -moz-border-radius: $border-radius-large; +} diff --git a/client/src/components/RuleBuilder/rule-definitions.js b/client/src/components/RuleBuilder/rule-definitions.js index 1c1c87c071db..abf369514133 100644 --- a/client/src/components/RuleBuilder/rule-definitions.js +++ b/client/src/components/RuleBuilder/rule-definitions.js @@ -2,7 +2,7 @@ import pyre from "pyre-to-regexp"; import _ from "underscore"; import _l from "utils/localization"; -import MAPPING_TARGETS from "./column-targets.yml"; +import MAPPING_TARGETS from "./rule_targets.yml"; const NEW_COLUMN = "new"; diff --git a/client/src/components/RuleBuilder/rule_targets.yml b/client/src/components/RuleBuilder/rule_targets.yml new file mode 120000 index 000000000000..323050dbe118 --- /dev/null +++ b/client/src/components/RuleBuilder/rule_targets.yml @@ -0,0 +1 @@ +../../../../lib/galaxy/model/dataset_collections/rule_targets.yml \ No newline at end of file diff --git a/client/src/components/RuleCollectionBuilder.vue b/client/src/components/RuleCollectionBuilder.vue index 3ada38a3e849..8a24f3cd2290 100644 --- a/client/src/components/RuleCollectionBuilder.vue +++ b/client/src/components/RuleCollectionBuilder.vue @@ -689,6 +689,11 @@ export default { required: false, type: Object, }, + initialMapping: { + // only respected if elementsType is raw currently - other element types have their own default behaviors that make sense (e.g. assigning ftp paths to a URI implicitly) + required: false, + type: Array, + }, defaultHideSourceItems: { type: Boolean, required: false, @@ -735,6 +740,8 @@ export default { mapping = [{ type: "url", columns: [0] }]; } else if (this.elementsType == "datasets") { mapping = [{ type: "list_identifiers", columns: [1] }]; + } else if (this.initialMapping) { + mapping = this.initialMapping; } else { mapping = []; } @@ -1293,6 +1300,11 @@ export default { this.$refs.hotTable.$el.click(); }, 200); } + // is this comparable to watch immediate in newer Vue code?, I just need that event to + // to flair if it is initially okay also. + if (this.validInput) { + this.$emit("validInput", true); + } }, methods: { restoreRules(event) { diff --git a/lib/galaxy/app_unittest_utils/fetch_workbook.xlsx b/lib/galaxy/app_unittest_utils/fetch_workbook.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8d1e3e394f03de6df30de887a4f8d468d20b631e GIT binary patch literal 7341 zcmZ{J1yodR*Y?mLozf}Noq}}8P}0&!4CRnRgOr4X64D_lAl=^uARuK$@^bEr7X=`scEs)5TV`)`?ht;s_4sPx=m&9< zOZs0tO%^+1&!jL8$psm~F6V=D`GY7I=x~6|`bT+n0NE zy+L2lgm#}tiijvTRaejRIi7zOHh&kBpM=LO0jFV6rd67KCUGAP^2t;#twfB?Z)>*B z%X^J#3!C4WvLJAp$zc*<;tQxU(V?c}$1HA_4gjjfX?j#NM&L;_Ei1Mr&QYciv_D;- z1kwVJ$>ZuGx-euAXh!xtk^xWnC#F)I$r2FJ1EVV$RAx`_)+q8coC$RldT6-(L1Fa75aM3A{NJ1Gg8GoJ0_ zEwD`P?Tk6gpF1@fPHGp4gBv~?6}1BN1%p%gz1eV-^?F6~F7t|r$Eyl!XSWmbuae$Z z>3k``Iivu3Wpp~vvvt_6GN^>-lS)fROF@3TT`xKIc<$o^aUDNL13 zRQ)@Izxi`3i?-Sn5dcsn0RV8J{&WGeIhcd2K)+XxUmo4n>bEB0!)ko`QxMDjHS=_& z5m5x3T+9luozawRFzI(a%UEpZ8cGJiTKw)3QJ-{+VL6`#H~JYw8E^fOY`esC1)qBH zz%^NIZ8b0*;`LK9Fw4d2?VxQyt=pbh&kz-MGE8w;M7pyc4-I)P612}~k-+$dvfL!Y-18 zrGS}_D#+Jh{1lBh$}CEx=(D@G93B9F#?$>4B3s|P#G$If<|j{TKJsJQuy{Bp7u%j% zP(D?nPaEbi=u$c5r4o#hCZo&n6UGhPe6YS=3rYL)te9Pvy`A{iB}G$MpES^4V`G}i z=QU`b^qhw_!mw9-+;OT)wsw>r1lSV#W@*H5_G{ZT%zPbT_Sv13nn{*zP=OrLUcW=- z5nJUOG2tk~8<65h^7aM-}qFWuixqSR{6 zt1bF2@Z1gIjL4*G`t#F9jKecon3N1kV$~^MJ8ubP}ZE zc+R#0Sa|OcT|O+0kksMz_|x{Ib}J3(R`_B<(10jw~$3S~)`3Q$ai zbrY*Q>X=epF1;r^FUYhoaX2)_+ODJ{1)B8+v=UK^V64scX;CjawRX)|J3w>79f86N zWJxay_dY{%5=K)Uz>C0-G#lh?@vR))AH}c9D6m(C_sPF~)^oO?uU?bz-&Y>%7<(5f zOvdlSy;v@xdf59To2z=Jq<>#;Y@;vxjQ52@L^xKQtKR|cM%vxTk2NMLMD6{f#;za){>d0do?Vgi;<%?Z*$)0R~?iBx2gTpX(;*=<4!p36T<{h z6d^@@+;Tlrm}|)5co{;49*27SrXM{KMV7$d>kd>_s&7ukZ38<#%bWm)Z939(W)$xf zEY53<`smeGo!ed91gu(y*W`kdacNzM_6;;$s*d<)X|Q%ekOu2&ZwmJjAFgWam}3XE ztu_$|64qP|z_n`*7GGXwhLYu=(C~fR`a(-epo~eN0B4g`Jv|gfcUVEckHJbXwJYYE z7nUQ7fs-T=rpH5Lz>GRlBKabgeKsD%zA>*xJqs5gjKNiLJ`RA2#X2C8&g--}Z(P?PK5$kpIUk5}2u;Z-H_sLml~9>_=0L-t)j$e;7Z z*6YP6Lot0e;#37W8DmK}^RJb8&QI+hR=yA-oJ&lnU<@LbCa2hXaezk=UCqNYUA5#y zKo>nd9#fp?c3@S{O=i8RKjP|6O{S%xs2G!$p)r(SGy#XVQjd>jnD1X?fuSE;d(_4p zP?x5JLUNw{ybNWshA;8HIKNfFzv9NXRgtXAUu{ z880}7i3uvgdtQd-&xP;0ZDqvOot!M%=adOWm1VCqvF*YoUzs;|7*Twh9vO4Ku8IT%@W-CS;C^U|ttER6Kkjjn+FFy}6|7jkWxJo4`?Q#xdn_`}qwk*7bDnxr4#8I7;(mOj%G>ylIh>c;DoAgzTL zHx~M!zpq51_fg^-&!mS#bmqLD+ob-0SYmB||2;WOVdJC{saCLXy)r{A(U2q@ov7gRWyU!4;`TmUn%xtT1AQWgdJ^!h*x6Ddbg*wH-p(?? zE5GmNC6{?BqeG4~#6K-XBA*atr5U_`-a1QP9l7PsC29#1JUL>bs47t-h+s0OaG#gv8*iZZ?0poqxdchZhP z=VvSUZXrBK4?_cH>d(zGXqp>bP+EcGV|gah1PK8Em ztbdwXlQIi~<|eSijd{7H(T9oQk{DTTBo#By-v2EPH_aiI9Xttyp~xrXiDMTdr6*Tv zI#^%A}IJaxg^wBfr;x_}`3CXln+Qv)cs zoL{9EM-#7*FgV);stOIE*{XjCVZY*m6i3*>yv|txMIN_Ds3*OB!{#?nYIx*aV;^Ou z-~}3-MB99cF>S$r={X7-m(^XK!nYe5Kef_tWTMy6uz2wJQhess@TEDCPKgrs?Wd&3 zY9W$$lN^UI3PdnKZ+B0dExUiM993ovj(P9 z-bMzfFcKlNYm5f6D5?nEP$XHMi?OU)|`N{mneg%AnXw7&v$f{x~)I3RP<qxS%l)E~-BfdJWSNt5N`^jzG$;G9iE`^#zLKr3 zi|lxtbzqFvsuX95$Kpx;SV~wax7}ujhi5DA(ENiZpeaUj0&RD*9QGo4a~hsrb6yvf z1;gyMh#%s^poIhw_)QG18b{pViG(eleKxVI<1N%`|6LZ~Q;!r9Lh;HBEuqkV;}zoI z3I;)bC6JlwdNzsdc<)onAG~iB(YZ~4@M;RG7FJpEzpdkt8z*$Yk9w9L{;akksIE;nKPg);*gB-V*;v{XQS74 zEi?j&%QjE00dY;33S& zUvge)2CW!~!VeqF&ag%01R3gd#7_${uLT*M8PY{R*TGP$vz2~Qgt{qkEFgqvH$i6y z&s1)y1{wf7&C4}HcrWW56dQ;4>Y1n~kqY4|rNHpQ)>G>K;;gW~7)%iGTc$<5c*RKO zPY-qz;7PyxGtwRs>LgnK6CZb#r28oU4;=#MF zFu%eB$6Cl>(t<@PW`@g(x1OlXrx9BDY0X3|>WeJq<#Ntc<+Aw?cYL`4iv^sB0vvL( z(oV8C)({~#G}LfarGbOpXP(~`)f}VgbRtks(&VxG$_qxS;i=k6J9qfJALz1?Z@muI zB5F5o5;efz2j)}j%m*wXu5$&+_pA4jWyyGB=L|y-j4Ev+Z9-@4n%vdzwfZ}fSyl%R zv0IMgWo~UEJ7f*+9WP8A3Dcgg6?WRg{fzRh1d6@xah35iqoa~KNaN!%J7$`bQp8kR zs0<}}UQp{#a#F&Bm_njMAuJ#|LWj|mN#YVUUy?TW#+Xfg(h|wkTX`h7d~S|HIO}Ki zy`C5Yf}~nuY4IJ8-0;QDS>5`EK-jHRvHoQlg!rb{ zrQv9Gdfct7J5~%&_byI%uc&%zUJjlGe?S90aK`}C2r0<QKZ7ULGv*DBoZHz}fr@WH+rxFcg|11r9 z9Pk4*Y*=)2rt3Xh9zBvQ!5U4srOU}VpowTWEUfSi{N4?3eKlo7E z^12SYeM2a67NE_A|K^vkHZFPSP=5IgWgpDn{PHUwFtV|E_lsfj)n=_%d9mKJ4S7*s z7JeMl6W{d*0SkOguubMO-)pnukl{v1`lK5pgxp)9>FgsdYahM4W?G@mea>Xp36e_oDXNQy$x!wF;#@A0OmJD z3zqY=mGiMW3&r>1PW1@=(zyW^SmPP<&cS85i}r0pGLr%%V%T04ri+;!UBcggl^9H}E$jOrB~XB?|N> zo-nrzJ}vRqu;YPw#LA4*y`LJPpBKIU@DMc>)ri`1$bq;?sl=?hr9DT0VCe}Z{oPl#TmH|< z>?abP-4@t>p1U}Fha>d;zU*hQJ_g~i7~(Lr-%JD>gmCzZxprZCF>~MX$`i{*Cgc*S^#`9K8g6{h7RUb3ZOclfJB99z8e6ZnD1K z?2kefm6^D<|fNY_!>+krmOeAq_2I0qV}8}y)~E4X+&>Akqp zMa}+zr%@dJL%Ca-BSR4RM8dX^2-&{(mg>r@5kg{7`K&g0oTKK1sw<&2Yabf7&u(!{ z%T-}>RKl3Waoh>I^%jrQr%I>?9rM4%zNn$t(E55U@-T>m&LeqevKdU}k?-L< z;MU8>u#*;rfdMmEWuht~8-$A2;7btm%Gl_e#gdk%MzyC*8EF4$k;$i<#Mi>vUF5GJ zSSTo~Mf>jJ0~2 z6LM^DHUX{&{hdx1JQyo*xMwX)Il(yaZn)qBK?IxrlYTg4qrC!h-&Y8@IJ?_Bd3&fB z2@u>G;m9uLuXy|$m-Mtmff-rUlBYEz0o;}Y?2eq4kRh@jD9iMguX$hlsqPl|mRD6# zCNP+;Kh#Lfo+GCCx>5|E5TO8HqPqfye~wWxUA9P_BOlk_nku*sylct0Mq4M%lOxaN z5%LIdpbYGKMd8H|30Y5%uTwu*b`ZbmVQ88?X6`*bFkS2J9AAVQ;=fHLF9wTk0HvRB z_}{%S9SIw22avUcftsr=2%`57(L^aIwy|URU&zc+9TU)6Yf<_oIJ6AnFe+obT>%=8 zm#_?$msgpsR3fL1_|AM{>HK=pSYPszxJV$8g)0515U803hW+}es{TR(a^#jpZO>Q3FRN${yBN3%BB zBlF2fazj`CM4qaQDmT3vEMg-=t%)~%xAFEs{RzG=PX+b!Xt`{!>NTA}U#3wautGCa z2bO)2MAJ!KOXa?QMv$}wuX4m!^-e;2(0X9liSxl`yJ3V-RHKDF`pjg=XIy%ANhM+( zz&1EyBVDy)ulnK&a`f!lo7C9v0e9DaJMW}f9ILzejS3Q34FcT*=c&R(>ch_l1wp9~ zoCGAj^XUeL;=cP8oJi#4gNIWO%37Au@dIUDH4 z>Hg}E)vbz!8R#2cs84>i=b^nxh`FIX$VAn_-oo1KpWIS6Ozf9Kq@WIIIc|*iX7meR zWepFXDi%EjsJhGOwy!S5N1dN%!t!c0Uhrf*%3LRm>^zJGFoXYy9tZ! z&)`Y#!tQJxn??26E^4@#eB%wp$OSt^x`5t=C zlJC-yS=<3d)-21Hr)E9v?Wt>(7T;1+GwfOzubf;K=H!$KKZsGlG1RC;)ZJg_y3>Z~ z#`gSF!B$6+>W+9J1i%v-@%t7OB%X}M@w$u06cyzRg{4Jdc0ugks_VWL&ppFrA+Z22 zkY{}3VUHh8anyM1MAGLW$Gb#!y3RJ2-c;_kJ;n{v5!N9mUMoHDd_u6sxqHJjgODkG;e+~}}?2~^E%HQWWP^sK{^3K)i-nhv60PyYi%+Q^dt literal 0 HcmV?d00001 diff --git a/lib/galaxy/app_unittest_utils/fetch_workbook_paired.xlsx b/lib/galaxy/app_unittest_utils/fetch_workbook_paired.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a7ec750ede7bc5d537fdd58d04420600e986d6a4 GIT binary patch literal 7927 zcmd^E2T+sSwuaCG1c-n_L=QwE(pvz5NE1O2DGGujT_E({>!Bk}q(~DH!GM7DUP2cT z2u(zKM}^QtAoLe>?v>-*<2!TTeRuB6UCAW>|INSG-v77PUVE*z)s%>c=?Q>9AVIob zu{Oa8VZ>j%S@9V=7`mF{*Jh-XrPu3K;isNr zvtfGt=q+`|m`~pg5pz$%->f^Oc2>J>oXT<6jOe^BE*8_0c$_X>*S$!+YFiS|F6uvI z5YQ&C6WwaFR|*k+-BX)&VcAnJtN4_6M6>n{O$qTSLC~jxQmQ!?RE+p5a*U#jCMI`L z?u#@q*|ViG4K}mbNR&)8yK$&I6j&}yu`QE&ZdVi=WC5~Z*s|Q!eMGwzzjuj$S#m$ZvadwE5v2$4!=}o-uD!EXhuckx>Br`G(AR#0mAR{6mQ2XDgbhUM` zbTl(DadPDQ_V=C2@%y$+Z4lBa{oW&%HOVs{uRKtoaFFgL;IR-=_Ec8l3NO4?%<8S+ z>#eA$c@AOCy!}QmX3`7MnNaOsh;ZE6+^ZRltr&2r&?&$mr-!{<1Yh$GX>|_fZVdPM z?oH=PB8Hb4a&udkJ%m$xe3D#B3iuA~wruLW>uQov2Oa5+7Os*t!>gBjR8?IQE)R?w zZClkPX@;og4~^Ee9UtAyw7$+%Wz*-I8=xrWbMZ1qm>XnN!o6eJ`JY%g_Q zb{()d>NQ4;^KONFUbT6WG9F^%x+-PqT2VWe@4B>fP|~?M{vfLv#H*UMoiK0^xN8e@ za^-+m-?!4QANF!Ks}k#)ng8Nabu{mzr)oX%tY^YXtB2XsYf~t4Ht}P~hLo?T>s~s} zyIo|p2i;{a?gQW3PT%)UZQ2(KRMnnu{WuwF;-L`qZNlK0ZeYg;KT7b-G8E+TGM2CzUG( z=GWV{Md#96D?Y33_AEOc*j4V7_6)Q<8ra3*n#X)AkNoOBJkvoBpC>)A*G5;1skpc} z2Srejq!}H9S+4f21*x%YRvjhYrmp1F8I+!u)X{3>D0$DWbwe??C8TMLRhg}X< zj6KhJnz>!UE%chWPA=IQ)->1H@+}~e52_E;*1{b;&0J&|7TDCskjb2<^mDeoMo?rko&U%9=CO z+&3M0-WkWhVug}2J6M%r=7Nf`ipe+ojf-Ki!by1@#>y~D!F#bC$v5ABhhB8hE2GQ= zF7)+LfF)^z-+)*mKR=d2Ffq)(K_1LV4&ah8fB~_@e(5ZQ^u#Ft22C&{CBRa~fES1* z@oQu$WF&?KG?;=JsQ|e$1|mQ#soyk9;W=VdK!XdIkp{3NV;~8{lKBxr3t5O^fepc6 z#!~>Us|NBwEYJ@MEo3D|1vWeZGtvPpuNtTTvE+U-&_Z@%SWp8B%y=4*d(}V_h^6o| zgcfoWqkLQ4E-zM48xdT17t@6=tJi}88lS^#d!mLE&~2?`@i1Pzvs#54-i^}tPxTH3woFe zErPSU1h-)RS#XDr=CD0LYGaLrM#4`1@I!#q_7sgIE|%u-6Vbn{lmFuw|D?IGOK=w^ zdKe;zkTpfNV?jx&&=UB+9^5-iiEj5VD4y!%_G%QTeEPVcbV`W_r%j3efVZLl&z}F{ z+XSVi7sBaXf}1dEA0QM6Swkc{7WD4~aXC@wyo`dSPn3w=w^90qr5BNN;fUSED0J<` zf8%gWkl9#Jd}?|zT*D>!Ek`tPgqcDYI+IW$t4(r=^cjr*MWwGaF9!IBZmJ2gcQ+ifMmpi0#g6` zz@~j)+2+qHrpa5Ad|Tn+B`NwM16wXt9(DNG+IY{)-C1(<(9Jp29-*7ogMQfNSFa9c zrbO5Ai5^sro7U(o9~BSt4dsqcmvqkze*M_GHRw==aIczRhB+rPkwoxY*Q=zBJvPESJ{L3eV_TBdiE2p{X+^BfntZ)%Y1*74EbmGqiV?CMR zT%kQws}E;h$KBf$l|BP~!I#RLNthki(GIkrA)PPI$v4?GU)yRiW*T2B?Gr|hip|(! zd!03>b=78ne}?gb?p}Ti#6W z4l&D2$E1dn?QIg{v)~cHuSli)PO6iciM0vex8H9GaJQDO?I;WV0m&Xs==LCUQLL!N zZQ;)^B&?E4tw!KQMcrXVB*wZ&2mCHp`-7U6oP&IRomC0VRSj%Imwn+BhX%X~nD@O^ z#n1-su8U1E!uvZ9j_k?(qC@3E7eEEm7tgGjZcLwjZ5##%#UbdpN`qg;wZ1LBmffYG zGf!e_!Hsbb&RqP`5+>@QW(c>aDwn7byvCS!u%ooUAqs<{JDb&mlk)uSZa&s{CwGH_ zkWe;h#!W%s3BU%!Ds@Z8BeWlU+XpDTK`2t~`j)vdb~egkjjT_&bd&6LciA98_PLp@ z-CTQNmTK3u@Q<47!IgSfsH6?EH3goIj`F20>qZJo-*II0MDCRjwKA1Qu$@_EUBNWlyS zS9Hl)k{9O#Ccmz~qi6~25Ae#wh4;2xQ&B9{k{|C=iJX$lN#1liuhaC{hUi(R{xwtx z{b`G{*L$Kv#horlKc|lDTA|#hr#s zl~%X}o2l#YO8XbUs!`HPG=KU1guo&yb^2-aPE!@N|GGv^hR~VLiuN+$S|;|Tt%d+` zN4S0v`K_YhqUv`Va4wB2jkG|p@;h0!xU$|O#}ay-b_8ob1u7(G6M4;un!V%g1=CEw z;wLGyyx46e-onW=2TiFC(I|WNOWf94k#}U|uj`ZZB6buXOJ?>FiT2YSMa8KdHgS+d zghodupPGKNU~>nntF?LSocr?UHW1Hk4K4Z&!Gfvk@m_)V=}B8!CXtG6H@6b9t9uZe zyxk80#fi186Z!2-d8V^@`+YUMcSlsU9u6@O2aGs89kF^kqOn{#70{4qBblCpNb1(o z3^>d$i*S9pvU%8$%9G-IywWHZ41_uGwWccSU#&Fy?<>vG%+SHaSl!9txPEtBwkvzw zZPCmR4orK#%Hni`rUT{aBFB4xJC9ZAWXHuRp6RBC27cweW z*#QkpShULLGF8t8C0eg<5gs<#qD#j_Bd|2TZK!&2sl0R#b{uqH$vU`}XdR|=E?!y2 znZIFPe8K5jr~VDGD;)AORYatN8lFx^G5cS&1Z5b>tX7bBp75s~GshhmJ%+>c(uVg_ ze_pA>?+nL9Pe&(rE0b@OwchErdCCv=@ka;Dd%o`!d6*0g&AFjbClq4!QZG^<{PYpa zXl8U)4`VZL?&}fjxa@Nc8?&n^ke`_}>rt|{NJd9Fu)zHg<#Sle&Q4DrYZu!BG9)Xoo z9eWVH4`W7?yEfDEc_Y+-WV2T61=Vr5#|zp;dJ4HOaS(IvFVciB75(MaTQhnXIOWXc z)S_f=TCkNC$tYeh_DZlNf4|&_&F`|il{d{1rnOfmV=&uB_&nM{e~2n7>`D;P+Snq1 zSmt26AP7z?{gKUBgvU9HLYT7DdVURZwLk27Vm9Z^IKXNQZw#MXfnM9_5Rx=H%Sq$~ zzY71CZ9j`K>{+g_v+wDf9trB4(krwpeYrlt9^rFu#Kv7JS(ew^TeGr*xmlO~BPg%i zGxLz9%4@%8KVh{)73HD>YVA|NmXVFv@fhPgD<81Qn_S9d5Fd%EKF^1&`YVH>`ED@Bk%Ez}t>w4kc#IOhI1Ub0?M2+32vC&c3RU>-`rhpQbDFoq5tlYdkY+Er;v`J>_m-^7 zXSlBwl9(X{W*IU3MF>>e%D%~ET+#?-;SvHb#FjMYQ!zT?H#y7QX_|&sJoO#G$YfsG2Wgx^|PWe z3rRUGB7HD5`muTh2scpm>{Kx!KKQW=hL&dSTZ#;ae%x*l2 zA_Mn-5G-PYDv#`LrHV|6gRe5^Jv?gNKk79rIXLUF?#JQU$dw&NOYA{s_3=K*YDmJ~ zC2&${MK#TDry~*#?@wCm6%Dn(hn)o|sn_rMT@35FZ8~e$TLnBNF)0M4O1_KIl(&wm zmaF3Iyq>fGQPVdLtGg4uRSvLcym)vCA*777W9TxMR@LHUUFUXquEG=JukS9B>)$M` zhrG+v5p9m$p}G`Wg1%F)S32=Ia4V9Q&rL<3!nW}WH~9vnUjT7!mnbQZSwj-l+8)lV zs$T|Wl=vsU)Afy_$Q~JLe||7YFP&S6>DFyK!-(`k0hkrlZ_mqR zq0(9AxlPNPmf`IatHqWJEc7s*| z=Q0jH(T+r$t5TH>v_=8HOUk>>UCI6p2@!NK8$ex@>P(ckt&FvZBi-Tc*&eHBQu9vC z>jQXYP#P>Rt}&-n+o-u+a8B!c!+cu*A*+%tKgC_xA*<9x=o{|zhTIt6D|hSgzMKr0 zX`9YcjDHdKU;6gr`18K3t&Nk3jg!8HyPb)n?zc@*y%QhAgZBZ4&@ZQjZNyjtleZdX zfeLXncd^P32Kn5_hbBuDwOMmm{NCx+mbW3`OJ!LbsU@wXbV*ZO0vv5Gqzn8-eC|Db z)VlFWg+8v)vas<1+m?cnWKA;f)~Q!h#;WmV4pG~;V=Q_J>o*#Mj{JRnR8;Sx@GYfD@$-GGBoy)7jlPvQL zb~ia)HGKkDYxp^6;-5{D2}i5de{6|FLHVSppALkm6q;b5-9*>gI@p$aJj0uVT-(Ls z;l7mTVite*sWrS;5IzGAIMLtqSlr{iyX+0TtnI+bH*`$J0c6NK;gPh9WMdQPs`V;A&879f9cNIy3`X>@<& ez7Id1f3UpOlt}O!H~|3#{*#P%h1bs=-~A8A_r`+& literal 0 HcmV?d00001 diff --git a/lib/galaxy/dependencies/pinned-requirements.txt b/lib/galaxy/dependencies/pinned-requirements.txt index 3942ec3b2e57..5c4d1587b8b8 100644 --- a/lib/galaxy/dependencies/pinned-requirements.txt +++ b/lib/galaxy/dependencies/pinned-requirements.txt @@ -116,6 +116,7 @@ nodeenv==1.9.1 numpy==2.0.2 ; python_full_version < '3.10' numpy==2.2.6 ; python_full_version >= '3.10' oauthlib==3.2.2 +openpyxl==3.1.5 orjson==3.10.18 oyaml==1.0 packaging==24.2 ; python_full_version < '3.10' diff --git a/lib/galaxy/model/dataset_collections/rule_target_models.py b/lib/galaxy/model/dataset_collections/rule_target_models.py new file mode 100644 index 000000000000..0fdf59d57b31 --- /dev/null +++ b/lib/galaxy/model/dataset_collections/rule_target_models.py @@ -0,0 +1,70 @@ +from typing import ( + Dict, + List, + Literal, + Optional, +) + +import yaml +from pydantic import ( + BaseModel, + RootModel, +) + +from galaxy.util.resources import resource_string + +RuleBuilderImportType = Literal["datasets", "collections"] +RuleBuilderModes = Literal[ + "raw", + "ftp", + "datasets", + "library_datasets", + "collection_contents", +] + + +class ColumnTarget(BaseModel): + label: str + help: Optional[str] + modes: Optional[List[RuleBuilderModes]] = None + importType: Optional[RuleBuilderImportType] = None + multiple: Optional[bool] = False + columnHeader: Optional[str] = None + advanced: Optional[bool] = False + requiresFtp: Optional[bool] = False + + +RuleBuilderMappingTargetKey = Literal[ + "list_identifiers", + "paired_identifier", + "paired_or_unpaired_identifier", + "collection_name", + "name_tag", + "tags", + "group_tags", + "name", + "dbkey", + "hash_sha1", + "hash_md5", + "hash_sha256", + "hash_sha512", + "file_type", + "url", + "url_deferred", + "info", + "ftp_path", +] + + +ColumnTargetsConfig = Dict[RuleBuilderMappingTargetKey, ColumnTarget] +ColumnTargetsConfigRootModel = RootModel[ColumnTargetsConfig] + + +def target_models() -> ColumnTargetsConfig: + column_targets_str = resource_string(__name__, "rule_targets.yml") + column_targets_raw = yaml.safe_load(column_targets_str) + return ColumnTargetsConfigRootModel.model_validate(column_targets_raw).root + + +def target_model_by_type(type: RuleBuilderMappingTargetKey) -> ColumnTarget: + return target_models()[type] diff --git a/client/src/components/RuleBuilder/column-targets.yml b/lib/galaxy/model/dataset_collections/rule_targets.yml similarity index 82% rename from client/src/components/RuleBuilder/column-targets.yml rename to lib/galaxy/model/dataset_collections/rule_targets.yml index f6c4ece83a46..b39caf5dea49 100644 --- a/client/src/components/RuleBuilder/column-targets.yml +++ b/lib/galaxy/model/dataset_collections/rule_targets.yml @@ -76,31 +76,50 @@ group_tags: name: label: "Name" importType: "datasets" + help: | + This is just the name of the dataset as it appears in the Galaxy history + panel. dbkey: label: "Genome" + help: | + This should be the genome build (e.g. hg19, mm9, etc...) that the data + is associated with. If this is not set, there will be no genome associated + with the dataset. modes: - "raw" - "ftp" hash_sha1: label: "Hash (SHA1)" + help: | + This is the SHA1 hash of the URI, this is used to verify that the file + has not been corrupted during transfer. modes: - "raw" - "ftp" advanced: true hash_md5: label: "Hash (MD5)" + help: | + This is the MD5 hash of the URI, this is used to verify that the file + has not been corrupted during transfer. modes: - "raw" - "ftp" advanced: true hash_sha256: label: "Hash (SHA256)" + help: | + This is the SHA256 hash of the URI, this is used to verify that the file + has not been corrupted during transfer. modes: - "raw" - "ftp" advanced: true hash_sha512: label: "Hash (SHA512)" + help: | + This is the SHA512 hash of the URI, this is used to verify that the file + has not been corrupted during transfer. modes: - "raw" - "ftp" diff --git a/lib/galaxy/model/dataset_collections/workbook_util.py b/lib/galaxy/model/dataset_collections/workbook_util.py new file mode 100644 index 000000000000..4d62cdc6ab5d --- /dev/null +++ b/lib/galaxy/model/dataset_collections/workbook_util.py @@ -0,0 +1,141 @@ +"""Shared code for working with workbook data. + +Module was developed for sample sheet work but extracted for reuse with fetch +workbooks. +""" + +import base64 +from dataclasses import dataclass +from io import BytesIO +from textwrap import wrap +from typing import List + +from openpyxl import ( + load_workbook, + Workbook, +) +from openpyxl.styles import Font +from openpyxl.worksheet.datavalidation import DataValidation +from openpyxl.worksheet.worksheet import Worksheet + +from galaxy.exceptions import RequestParameterInvalidException + +Base64StringT = str + + +def freeze_header_row(worksheet: Worksheet) -> None: + worksheet.freeze_panes = "A2" + + +def make_headers_bold(worksheet: Worksheet, headers: list) -> None: + for index in range(len(headers)): + make_bold(worksheet, 1, index) + + +def make_bold(worksheet: Worksheet, row: int, column: int): + """Make the target cell bold in the specified worksheet.""" + bold_font = Font(bold=True) + worksheet[f"{index_to_excel_column(column)}{row}"].font = bold_font + + +def index_to_excel_column(index: int) -> str: + """Converts a numeric index (0-based) into an Excel column label.""" + if index < 0: + raise ValueError("Index must be 0 or greater") + + column_label = "" + while index >= 0: + column_label = chr(index % 26 + 65) + column_label + index = index // 26 - 1 # Move to the next "digit" in base-26, adjusting for zero-based indexing + + return column_label + + +def uri_data_validation(column: str) -> DataValidation: + # Add data validation for "URI" column + # We cannot assume http/https since drs, gxfiles, etc... are all fine + uri_validation = DataValidation(type="custom", formula1=f'=ISNUMBER(FIND("://", {column}2))', allow_blank=True) + uri_validation.error = "Invalid URI" + uri_validation.errorTitle = "Error" + uri_validation.showErrorMessage = True + return uri_validation + + +def add_column_validation(column: str, data_validation: DataValidation, worksheet: Worksheet): + data_validation.add(f"{column}2:{column}1048576") + worksheet.add_data_validation(data_validation) + + +def set_column_width(worksheet: Worksheet, column_index: int, width: int): + worksheet.column_dimensions[index_to_excel_column(column_index)].width = width + + +def load_workbook_from_base64(content: str) -> Workbook: + decoded_content = base64.b64decode(content) + file_like = BytesIO(decoded_content) + try: + workbook = load_workbook(file_like, data_only=True) + except Exception: + raise RequestParameterInvalidException( + "The provided content is not a valid Excel file. Please check the content and try again." + ) + return workbook + + +def workbook_to_bytes(workbook: "Workbook") -> BytesIO: + output = BytesIO() + workbook.save(output) + output.seek(0) + return output + + +@dataclass +class HasHelp: + title: str + help: str + + +@dataclass +class HelpConfiguration: + instructions: List[str] + columns: List[HasHelp] + text_width: int + column_width: int + help_row_start: int = 3 + + +def wrap_instructions(instruction: str, help_config: HelpConfiguration) -> List[str]: + return wrap(instruction, width=help_config.text_width) + + +def add_instructions_to_sheet(worksheet: Worksheet, help_configuration: HelpConfiguration): + columns = help_configuration.columns + num_columns = len(columns) + help_label_index = num_columns + 2 + # why is the width not +1 + set_column_width(worksheet, help_label_index, help_configuration.column_width) + + help_start_row = help_configuration.help_row_start + current_row = help_start_row + + worksheet.cell(row=current_row, column=help_label_index, value="Instructions") + make_bold(worksheet, current_row, help_label_index - 1) + + current_row += 1 + + for instruction_index, instruction in enumerate(help_configuration.instructions): + worksheet.cell(row=current_row, column=help_label_index, value=f"> {instruction_index + 1}.") + instruction_lines = wrap_instructions(instruction, help_configuration) + for line in instruction_lines: + worksheet.cell(row=current_row, column=help_label_index + 1, value=line) + current_row += 1 + + current_row += 2 + worksheet.cell(row=current_row, column=help_label_index, value="Columns") + make_bold(worksheet, current_row, help_label_index - 1) + current_row += 1 + + for column in columns: + worksheet.cell(row=current_row, column=help_label_index, value=column.title) + worksheet.cell(row=current_row, column=help_label_index + 1, value=column.help) + current_row += 1 diff --git a/lib/galaxy/tools/fetch/__init__.py b/lib/galaxy/tools/fetch/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/lib/galaxy/tools/fetch/workbooks.py b/lib/galaxy/tools/fetch/workbooks.py new file mode 100644 index 000000000000..40c1f08e6c84 --- /dev/null +++ b/lib/galaxy/tools/fetch/workbooks.py @@ -0,0 +1,503 @@ +from dataclasses import dataclass +from typing import ( + Dict, + List, + Optional, + Tuple, + Union, +) + +from openpyxl import Workbook +from openpyxl.worksheet.worksheet import Worksheet +from pydantic import ( + BaseModel, + Field, +) +from typing_extensions import Literal + +from galaxy.exceptions import RequestParameterInvalidException +from galaxy.model.dataset_collections.rule_target_models import ( + ColumnTarget, + RuleBuilderMappingTargetKey, + target_model_by_type, +) +from galaxy.model.dataset_collections.workbook_util import ( + add_column_validation, + add_instructions_to_sheet, + Base64StringT, + freeze_header_row, + HasHelp, + HelpConfiguration, + load_workbook_from_base64, + make_headers_bold, + set_column_width, + uri_data_validation, +) + +FetchWorkbookType = Literal["datasets", "collection", "collections"] +FetchWorkbookCollectionType = Literal["list", "list:paired", "list:list", "list:list:paired", "list:paired_or_unpaired"] + +DEFAULT_FETCH_WORKBOOK_TYPE: FetchWorkbookType = "datasets" +DEFAULT_FETCH_WORKBOOK_COLLECTION_TYPE: FetchWorkbookCollectionType = "list" +DEFAULT_WORKBOOK_TITLE: str = "Galaxy Data Import" + +INSTRUCTION_USE_THIS = "Use this spreadsheet to describe your datasets. For each dataset (i.e. each file), ensure all the labeled columns are specified and correct." +INSTRUCTION_EXTRA_COLUMNS = "Additional columns can be added if we you wish to specify additional metadata." # mention second tab when implemented +INSTRUCTION_ONCE_COMPLETE_DATASETS = ( + "Once data entry is complete, drop this file back into Galaxy to finish creating datasets for your inputs." +) +INSTRUCTION_ONCE_COMPLETE_COLLECTION = ( + "Once data entry is complete, drop this file back into Galaxy to finish creating a collection for your inputs." +) +INSTRUCTION_ONCE_COMPLETE_COLLECTIONS = ( + "Once data entry is complete, drop this file back into Galaxy to finish creating collections for your inputs." +) +INSTRUCTIONS_BY_TYPE: Dict[FetchWorkbookType, List[str]] = { + "datasets": [ + INSTRUCTION_USE_THIS, + INSTRUCTION_EXTRA_COLUMNS, + INSTRUCTION_ONCE_COMPLETE_DATASETS, + ], + "collection": [ + INSTRUCTION_USE_THIS, + INSTRUCTION_EXTRA_COLUMNS, + INSTRUCTION_ONCE_COMPLETE_COLLECTION, + ], + "collections": [ + INSTRUCTION_USE_THIS, + INSTRUCTION_EXTRA_COLUMNS, + INSTRUCTION_ONCE_COMPLETE_COLLECTIONS, + ], +} + +EXCEPTION_TOO_MANY_URI_COLUMNS = "Too many URI/URL columns in the supplied workbook." +EXCEPTION_NO_URIS_FOUND = "Failed to find any URI or URL like column in the supplied workbook." + + +@dataclass +class GenerateFetchWorkbookRequest: + type: FetchWorkbookType = DEFAULT_FETCH_WORKBOOK_TYPE + collection_type: FetchWorkbookCollectionType = DEFAULT_FETCH_WORKBOOK_COLLECTION_TYPE + title: str = DEFAULT_WORKBOOK_TITLE + + +WorkbookContentField: Base64StringT = Field( + ..., + title="Workbook Content (Base 64 encoded)", + description="The workbook content (the contents of the xlsx file) that have been base64 encoded.", +) + + +class ParseFetchWorkbook(BaseModel): + content: Base64StringT = WorkbookContentField + + +@dataclass +class HeaderColumn: + type: RuleBuilderMappingTargetKey + title: str # user facing + # e.g. for paired data will have two columns of URIs, record types maybe have any number + # and after dataset hash may have multiples of those also + type_index: int + + @property + def width(self): + if self.type in ["url", "url_deferred", "ftp_path"]: + return 50 + else: + return 20 + + @property + def name(self): + if self.type_index == 0: + return self.type + else: + return f"{self.type}_{self.type_index}" + + @property + def help(self) -> str: + column_target = _column_header_to_column_target(self) + return column_target.help if column_target.help else "" + + @property + def parsed_column(self) -> "ParsedColumn": + return ParsedColumn( + type=self.type, + type_index=self.type_index, + title=self.title, + ) + + +def generate(request: GenerateFetchWorkbookRequest) -> Workbook: + workbook = Workbook() + worksheet = workbook.active + worksheet.title = request.title + header_columns = _request_to_columns(request) + + header_titles = [h.title for h in header_columns] + worksheet.append(header_titles) + make_headers_bold(worksheet, header_titles) + freeze_header_row(worksheet) + for i, column in enumerate(header_columns): + set_column_width(worksheet, i, column.width) + + uri_data_validator = uri_data_validation("A") + add_column_validation("A", uri_data_validator, worksheet) + + columns_for_help = [HasHelp(c.title, c.help) for c in header_columns] + help_configuration = HelpConfiguration( + instructions=INSTRUCTIONS_BY_TYPE[request.type], + columns=columns_for_help, + text_width=50, + column_width=50, + ) + add_instructions_to_sheet( + worksheet, + help_configuration, + ) + + return workbook + + +ParsedRow = Dict[str, str] +ParsedRows = List[ParsedRow] + + +class ParsedColumn(BaseModel): + type: RuleBuilderMappingTargetKey + type_index: int + title: str + + +class ParseLogEntry(BaseModel): + message: str + + +class SplitUpPairedDataLogEntry(ParseLogEntry): + message: str + old_forward_column: ParsedColumn + old_reverse_column: ParsedColumn + new_paired_status_column: int + + +class InferredCollectionTypeLogEntry(ParseLogEntry): + message: str + from_columns: List[ParsedColumn] + + +ParseLog = List[ParseLogEntry] + + +class BaseParsedFetchWorkbook(BaseModel): + rows: ParsedRows + columns: List[ParsedColumn] + workbook_type: FetchWorkbookType + parse_log: ParseLog + + +class ParsedFetchWorkbookForDatasets(BaseParsedFetchWorkbook): + workbook_type: FetchWorkbookType = "datasets" + + +class ParsedFetchWorkbookForCollections(BaseParsedFetchWorkbook): + workbook_type: FetchWorkbookType = "collection" + collection_type: FetchWorkbookCollectionType + + +ParsedFetchWorkbook = Union[ParsedFetchWorkbookForDatasets, ParsedFetchWorkbookForCollections] + + +def parse(payload: ParseFetchWorkbook) -> ParsedFetchWorkbook: + parse_log: ParseLog = [] + workbook = load_workbook_from_base64(payload.content) + column_headers = _read_column_headers(workbook.active) + _validate_parsed_column_headers(column_headers) + raw_rows = _load_row_data(workbook, payload) + + # the rule builder does require splitting the paired data in this way but it might + # be worth it to do it with an "initial rule" instead to demo how you'd do it + # with actual rule builder rules? Not sure. + rows, column_headers, split_data_log_entry = _split_paired_data_if_needed(raw_rows, column_headers) + if split_data_log_entry: + parse_log.append(split_data_log_entry) + columns = [ParsedColumn(title=c.title, type=c.type, type_index=c.type_index) for c in column_headers] + if _is_fetch_workbook_for_collections(column_headers): + collection_type, log_entry = _infer_fetch_workbook_collection_type(column_headers) + parse_log.append(log_entry) + assert collection_type in ["list", "list:paired", "list:list", "list:list:paired", "list:paired_or_unpaired"] + return ParsedFetchWorkbookForCollections( + collection_type=collection_type, rows=rows, columns=columns, parse_log=parse_log + ) + else: + return ParsedFetchWorkbookForDatasets(rows=rows, columns=columns, parse_log=parse_log) + + +def _validate_parsed_column_headers(column_headers: List[HeaderColumn]) -> None: + uri_like_columns = _uri_like_columns(column_headers) + if len(uri_like_columns) > 2: + raise RequestParameterInvalidException( + f"{EXCEPTION_TOO_MANY_URI_COLUMNS}. Relevant headers are {[c.title for c in uri_like_columns]}" + ) + if len(uri_like_columns) == 0: + raise RequestParameterInvalidException(EXCEPTION_NO_URIS_FOUND) + + +def _request_to_columns(request: GenerateFetchWorkbookRequest) -> List[HeaderColumn]: + if request.type == "datasets": + return [ + HeaderColumn("url", "URI", 0), + HeaderColumn("name", "Name", 0), + ] + else: + if request.collection_type == "list": + header_columns = [ + HeaderColumn("url", "URI", 0), + HeaderColumn("list_identifiers", "List Identifier", 0), + ] + elif request.collection_type == "list:paired": + header_columns = [ + HeaderColumn("url", "URI 1 (Forward)", 0), + HeaderColumn("url", "URI 2 (Reverse)", 1), + HeaderColumn("list_identifiers", "List Identifier", 0), + ] + elif request.collection_type == "list:list": + header_columns = [ + HeaderColumn("url", "URI", 0), + HeaderColumn("list_identifiers", "Outer List Identifier", 0), + HeaderColumn("list_identifiers", "Inner List Identifier", 1), + ] + elif request.collection_type == "list:list:paired": + header_columns = [ + HeaderColumn("url", "URI 1 (Forward)", 0), + HeaderColumn("url", "URI 2 (Reverse)", 1), + HeaderColumn("list_identifiers", "Outer List Identifier", 0), + HeaderColumn("list_identifiers", "Inner List Identifier", 1), + ] + elif request.collection_type == "list:paired_or_unpaired": + header_columns = [ + HeaderColumn("url", "URI 1 (Forward)", 0), + HeaderColumn("url", "URI 2 (Optional/Reverse)", 1), + HeaderColumn("list_identifiers", "List Identifier", 0), + ] + else: + raise NotImplementedError() + if request.type == "collections": + header_columns.append( + HeaderColumn( + type="collection_name", + title="Collection Name", + type_index=0, + ) + ) + return header_columns + + +def _load_row_data(workbook: Workbook, payload: ParseFetchWorkbook) -> ParsedRows: + sheet = workbook.active # Get the first sheet + + rows: ParsedRows = [] + + column_headers = _read_column_headers(sheet) + columns_to_read = len(column_headers) + + for row_index, row in enumerate(sheet.iter_rows(max_col=columns_to_read, values_only=True)): + if row_index == 0: # skip column headers + continue + if not row[0]: + break + parsed_row: ParsedRow = {} + for value, column in zip(row, column_headers): + parsed_row[column.name] = value + rows.append(parsed_row) + return rows + + +def _split_paired_data_if_needed( + rows: ParsedRows, column_headers: List[HeaderColumn] +) -> Tuple[ParsedRows, List[HeaderColumn], Optional[SplitUpPairedDataLogEntry]]: + split_rows: ParsedRows = [] + uri_like_columns = _uri_like_columns(column_headers) + if len(_uri_like_columns(column_headers)) != 2: + return rows, column_headers, None + + url_column_0 = uri_like_columns[0] + url_column_1 = uri_like_columns[1] + + # Split the data up + split_rows = [] + for row in rows: + url_0 = row.pop(url_column_0.name) + url_1 = row.pop(url_column_1.name) + row_0 = row.copy() + row_1 = row.copy() + row_0[url_column_0.name] = url_0 + row_1[url_column_0.name] = url_1 + row_0["paired_identifier"] = "1" + row_1["paired_identifier"] = "2" + split_rows.append(row_0) + split_rows.append(row_1) + + # Adjust the columns accordingly + column_headers.remove(url_column_1) + column_headers.append( + HeaderColumn( + type="paired_identifier", + title="Paired Identifier", + type_index=0, + ) + ) + split_log_entry = SplitUpPairedDataLogEntry( + message="Merged paired data URIs columns into single column and added paired identifier column.", + old_forward_column=url_column_0.parsed_column, + old_reverse_column=url_column_1.parsed_column, + new_paired_status_column=len(column_headers) - 1, + ) + return split_rows, column_headers, split_log_entry + + +def _read_column_header_titles(worksheet: Worksheet) -> List[str]: + # TODO: capture parsing information messages, a lot less structure here than sample sheet + # stuff where we know expected columns... I think. + index = 1 + titles: List[str] = [] + while True: + value = worksheet.cell(1, index).value + if not value: + break + titles.append(value) + index += 1 + return titles + + +def _read_column_headers(worksheet: Worksheet) -> List[HeaderColumn]: + column_titles = _read_column_header_titles(worksheet) + return _column_titles_to_headers(column_titles) + + +COLUMN_TITLE_PREFIXES: Dict[str, RuleBuilderMappingTargetKey] = { + "name": "name", + "listname": "collection_name", + "collectionname": "collection_name", + "uri": "url", + "url": "url", + "urldeferred": "url_deferred", + "deferredurl": "url_deferred", + "genome": "dbkey", + "dbkey": "dbkey", + "filetype": "file_type", + "info": "info", + "tag": "tags", + "grouptag": "group_tags", + "nametag": "name_tag", + "listidentifier": "list_identifiers", + "pairedidentifier": "paired_identifier", +} + + +def _column_titles_to_headers(column_titles: List[str]) -> List[HeaderColumn]: + headers: List[HeaderColumn] = [] + headers_of_type_seen: Dict[RuleBuilderMappingTargetKey, int] = {} + + for column_title in column_titles: + normalized_title = ( + column_title.lower() + .replace(" ", "") + .replace("optional", "") + .replace("(", "") + .replace(")", "") + .replace(")", "") + ) + print(normalized_title) + if normalized_title not in COLUMN_TITLE_PREFIXES: + for key in COLUMN_TITLE_PREFIXES.keys(): + if normalized_title.startswith(key): + normalized_title = key + break + elif normalized_title.endswith(key): + normalized_title = key + break + + if normalized_title not in COLUMN_TITLE_PREFIXES: + print(f"Unknown column title: {column_title}") + break + + column_type: RuleBuilderMappingTargetKey = COLUMN_TITLE_PREFIXES[normalized_title] + if column_type in headers_of_type_seen: + type_index = headers_of_type_seen[column_type] + headers_of_type_seen[column_type] += 1 + else: + type_index = 0 + headers_of_type_seen[column_type] = 1 + header_column = HeaderColumn( + type=column_type, + type_index=type_index, + title=column_title, + ) + if header_column.type == "paired_identifier" and _implied_paired_or_unpaired_column_header(header_column): + header_column.type = "paired_or_unpaired_identifier" + headers.append(header_column) + + return headers + + +def _uri_like_columns(column_headers: List[HeaderColumn]) -> List[HeaderColumn]: + return [c for c in column_headers if c.type == "url" or c.type == "url_deferred"] + + +def _infer_fetch_workbook_collection_type( + column_headers: List[HeaderColumn], +) -> Tuple[str, InferredCollectionTypeLogEntry]: + paired_identifier_columns = [c for c in column_headers if c.type == "paired_identifier"] + paired_or_unpaired_identifier_columns = [c for c in column_headers if c.type == "paired_or_unpaired_identifier"] + any_paired = len(paired_identifier_columns) > 0 + uri_columns = _uri_like_columns(column_headers) + num_uris = len(uri_columns) + + inference_on_columns: List[ParsedColumn] = [] + + list_type: str = "" + for column_header in column_headers: + if column_header.type == "list_identifiers": + inference_on_columns.append(column_header.parsed_column) + if list_type: + list_type = f"list:{list_type}" + else: + list_type = "list" + if any_paired or num_uris == 2: + if num_uris == 2: + inference_on_columns.append(uri_columns[0].parsed_column) + inference_on_columns.append(uri_columns[1].parsed_column) + if _implied_paired_or_unpaired_column_header(uri_columns[1]): + collection_type = f"{list_type}:paired_or_unpaired" + else: + collection_type = f"{list_type}:paired" + else: + paired_identifier_column = paired_identifier_columns[0] + inference_on_columns.append(paired_identifier_column.parsed_column) + if _implied_paired_or_unpaired_column_header(paired_identifier_column): + collection_type = f"{list_type}:paired_or_unpaired" + else: + collection_type = f"{list_type}:paired" + elif len(paired_or_unpaired_identifier_columns) > 0: + inference_on_columns.append(paired_or_unpaired_identifier_columns[0].parsed_column) + collection_type = f"{list_type}:paired_or_unpaired" + else: + collection_type = list_type + return collection_type, InferredCollectionTypeLogEntry( + message="Inferred collection type from column headers.", + from_columns=inference_on_columns, + ) + + +def _implied_paired_or_unpaired_column_header(column_header: HeaderColumn) -> bool: + # URI 2 - Reverse (Optional), Optional URI 2, Optional Paired Identifier all + # imply paired_or_unpaired lists using this logic. + return "optional" in column_header.title.lower() + + +def _is_fetch_workbook_for_collections(column_headers: List[HeaderColumn]) -> bool: + return _infer_fetch_workbook_collection_type(column_headers)[0] != "" + + +def _column_header_to_column_target(column_header: HeaderColumn) -> ColumnTarget: + return target_model_by_type(column_header.type) diff --git a/lib/galaxy/webapps/galaxy/api/common.py b/lib/galaxy/webapps/galaxy/api/common.py index 44d68626fc71..228d68871ec7 100644 --- a/lib/galaxy/webapps/galaxy/api/common.py +++ b/lib/galaxy/webapps/galaxy/api/common.py @@ -1,5 +1,6 @@ """This module contains utility functions shared across the api package.""" +from io import BytesIO from typing import ( Any, List, @@ -13,6 +14,7 @@ Query, Request, ) +from starlette.responses import StreamingResponse from typing_extensions import Annotated from galaxy.schema import ( @@ -294,3 +296,12 @@ def parse_elements( return elements return parse_elements + + +def serve_workbook(content: BytesIO, filename: Optional[str]) -> StreamingResponse: + filename = filename or "galaxy_sample_sheet_workbook.xlsx" + return StreamingResponse( + content, + media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + headers={"Content-Disposition": f"attachment; filename={filename}"}, + ) diff --git a/lib/galaxy/webapps/galaxy/api/tools.py b/lib/galaxy/webapps/galaxy/api/tools.py index 17d2e6cadab6..e3f605d91085 100644 --- a/lib/galaxy/webapps/galaxy/api/tools.py +++ b/lib/galaxy/webapps/galaxy/api/tools.py @@ -16,12 +16,14 @@ from fastapi import ( Body, Depends, + Query, Request, Response, UploadFile, ) from fastapi.responses import FileResponse from starlette.datastructures import UploadFile as StarletteUploadFile +from starlette.responses import StreamingResponse from galaxy import ( exceptions, @@ -33,6 +35,7 @@ from galaxy.managers.context import ProvidesHistoryContext from galaxy.managers.hdas import HDAManager from galaxy.managers.histories import HistoryManager +from galaxy.model.dataset_collections.workbook_util import workbook_to_bytes from galaxy.schema.fetch_data import ( FetchDataFormPayload, FetchDataPayload, @@ -40,6 +43,15 @@ from galaxy.tool_util.verify import ToolTestDescriptionDict from galaxy.tool_util_models import UserToolSource from galaxy.tools.evaluation import global_tool_errors +from galaxy.tools.fetch.workbooks import ( + FetchWorkbookCollectionType, + FetchWorkbookType, + generate, + GenerateFetchWorkbookRequest, + parse, + ParsedFetchWorkbook, + ParseFetchWorkbook, +) from galaxy.util.hash_util import ( HashFunctionNameEnum, memory_bound_hexdigest, @@ -53,6 +65,7 @@ ) from galaxy.webapps.base.controller import UsesVisualizationMixin from galaxy.webapps.base.webapp import GalaxyWebTransaction +from galaxy.webapps.galaxy.api.common import serve_workbook from galaxy.webapps.galaxy.services.tools import ToolsService from . import ( APIContentTypeRoute, @@ -86,6 +99,22 @@ class PNGIconResponse(FileResponse): media_type = "image/png" +FetchWorkbookTypeQueryParam: FetchWorkbookType = Query( + default="datasets", + title="Workbook Type", + description="Generate a workbook for simple datasets or a collection.", +) +FetchWorkbookCollectionTypeQueryParam: FetchWorkbookCollectionType = Query( + default="list", + title="Collection Type", + description="Generate workbook for specified collection type (not all collection types are supported)", +) +FetchWorkbookFilenameQueryParam: Optional[str] = Query( + None, + description="Filename of the workbook download to generate", +) + + router = Router(tags=["tools"]) FetchDataForm = as_form(FetchDataFormPayload) @@ -123,6 +152,39 @@ def fetch_form( ): return self.service.create_fetch(trans, payload, files) + @router.get( + "/api/tools/fetch/workbook", + summary="Generate a template workbook to use with the activity builder UI", + response_class=StreamingResponse, + operation_id="tools__fetch_workbook_download", + ) + def fetch_workbook( + self, + trans: ProvidesHistoryContext = DependsOnTrans, + type: FetchWorkbookType = FetchWorkbookTypeQueryParam, + collection_type: FetchWorkbookCollectionType = FetchWorkbookCollectionTypeQueryParam, + filename: Optional[str] = FetchWorkbookFilenameQueryParam, + ): + generate_request = GenerateFetchWorkbookRequest( + type=type, + collection_type=collection_type, + ) + workbook = generate(generate_request) + contents = workbook_to_bytes(workbook) + return serve_workbook(contents, filename) + + @router.post( + "/api/tools/fetch/workbook/parse", + summary="Generate a template workbook to use with the activity builder UI", + operation_id="tools__fetch_workbook_parse", + ) + def parse_workbook( + self, + trans: ProvidesHistoryContext = DependsOnTrans, + payload: ParseFetchWorkbook = Body(...), + ) -> ParsedFetchWorkbook: + return parse(payload) + @router.get( "/api/tools/{tool_id}/icon", summary="Get the icon image associated with a tool", diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 9dde25e9ded0..00a8168190d4 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -23,6 +23,7 @@ from galaxy.util.unittest_utils import skip_if_github_down from galaxy_test.base import rules_test_data from galaxy_test.base.api_asserts import ( + assert_file_looks_like_xlsx, assert_has_keys, assert_status_code_is, ) @@ -398,6 +399,10 @@ def test_model_attributes_sanitization(self): ) assert sanitized_address.strip() == cool_name_without_quote + def test_fetch_workbook_generate(self): + workbook_path = self.dataset_populator.download_fetch_workbook() + assert_file_looks_like_xlsx(workbook_path) + @skip_without_tool("composite_output") def test_test_data_filepath_security(self): test_data_response = self._get("tools/composite_output/test_data_path?filename=../CONTRIBUTORS.md", admin=True) diff --git a/lib/galaxy_test/base/api_asserts.py b/lib/galaxy_test/base/api_asserts.py index ec51a38844f2..fcc3c5c83444 100644 --- a/lib/galaxy_test/base/api_asserts.py +++ b/lib/galaxy_test/base/api_asserts.py @@ -113,3 +113,11 @@ def _as_dict(response: Union[Response, dict]) -> Dict[str, Any]: assert_has_key = assert_has_keys + + +def assert_file_looks_like_xlsx(xlsx_file: str): + # Check the file header + with open(xlsx_file, "rb") as file: + header = file.read(4) + # The ZIP file signature is 0x50 0x4B 0x03 0x04 + return header == b"\x50\x4b\x03\x04" diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 804338757df3..4451693dfe77 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -420,8 +420,11 @@ def _get_to_tempfile(self, route, suffix=None, **kwd) -> str: """Perform a _get and store the result in a tempfile.""" get_response = self._get(route, **kwd) get_response.raise_for_status() + return self._get_response_to_tempfile(get_response) + + def _get_response_to_tempfile(self, response, suffix=None) -> str: temp_file = tempfile.NamedTemporaryFile("wb", delete=False, suffix=suffix) - temp_file.write(get_response.content) + temp_file.write(response.content) temp_file.flush() return temp_file.name @@ -1108,6 +1111,12 @@ def get_remote_files(self, target: str = "ftp") -> dict: response.raise_for_status() return response.json() + def download_fetch_workbook(self) -> str: + url = "tools/fetch/workbook" + download_response = self._get(url) + api_asserts.assert_status_code_is_ok(download_response) + return self._get_response_to_tempfile(download_response) + def run_tool_payload(self, tool_id: Optional[str], inputs: dict, history_id: str, **kwds) -> dict: # Remove files_%d|file_data parameters from inputs dict and attach # as __files dictionary. diff --git a/packages/data/MANIFEST.in b/packages/data/MANIFEST.in index e08e1a3a8206..7bf27d3257fa 100644 --- a/packages/data/MANIFEST.in +++ b/packages/data/MANIFEST.in @@ -3,6 +3,7 @@ include galaxy/datatypes/set_metadata_tool.xml include galaxy/datatypes/converters/*.xml graft galaxy/datatypes/display_applications/configs/ graft galaxy/datatypes/display_applications/xsd/ +include galaxy/model/dataset_collections/rule_targets.yml include galaxy/model/migrations/alembic.ini recursive-include galaxy/model/migrations/alembic/ *.py include galaxy/model/migrations/README.md diff --git a/packages/data/setup.cfg b/packages/data/setup.cfg index e9b66834563f..8559e47fc663 100644 --- a/packages/data/setup.cfg +++ b/packages/data/setup.cfg @@ -49,6 +49,7 @@ install_requires = msal mrcfile numpy + openpyxl>=3.1.5 parsley pycryptodome pydantic[email]>=2.7.4 diff --git a/pyproject.toml b/pyproject.toml index abbcba232d7e..bcb7aa5b8684 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ dependencies = [ "nodeenv", "numpy>=1.26.0", # Python 3.12 support "numpy>=2.1.0 ; python_version>='3.10'", # Python 3.13 support + "openpyxl>=3.1.5", # Minimum version ever tested with "packaging", "paramiko!=2.9.0,!=2.9.1", # https://github.com/paramiko/paramiko/issues/1961 "Parsley", diff --git a/test/unit/app/tools/test_fetch_workbooks.py b/test/unit/app/tools/test_fetch_workbooks.py new file mode 100644 index 000000000000..1d4eff513d34 --- /dev/null +++ b/test/unit/app/tools/test_fetch_workbooks.py @@ -0,0 +1,256 @@ +import base64 +import os + +import pytest + +from galaxy.exceptions import RequestParameterInvalidException +from galaxy.model.dataset_collections.rule_target_models import target_models +from galaxy.tools.fetch.workbooks import ( + _column_titles_to_headers, + _infer_fetch_workbook_collection_type, + _read_column_header_titles, + _validate_parsed_column_headers, + DEFAULT_WORKBOOK_TITLE, + EXCEPTION_NO_URIS_FOUND, + EXCEPTION_TOO_MANY_URI_COLUMNS, + generate, + GenerateFetchWorkbookRequest, + parse, + ParseFetchWorkbook, +) +from galaxy.util.resources import resource_path + +WRITE_TEST_WORKBOOKS = False + + +def test_fetch_datasets_workbook(): + request = GenerateFetchWorkbookRequest() + workbook = generate(request) + assert workbook + worksheet = workbook.active + assert worksheet.title == DEFAULT_WORKBOOK_TITLE + assert worksheet.cell(1, 1).value == "URI" + + header_titles = _read_column_header_titles(worksheet) + assert header_titles == ["URI", "Name"] + + if WRITE_TEST_WORKBOOKS: + path = "~/fetch_workbook.xlsx" + expanded_path = os.path.expanduser(path) + workbook.save(expanded_path) + + +def test_fetch_list_workbook(): + request = GenerateFetchWorkbookRequest( + type="collection", + collection_type="list", + ) + workbook = generate(request) + assert workbook + worksheet = workbook.active + + header_titles = _read_column_header_titles(worksheet) + assert header_titles == ["URI", "List Identifier"] + + +def test_fetch_multiple_lists_workbook(): + request = GenerateFetchWorkbookRequest( + type="collections", + collection_type="list", + ) + workbook = generate(request) + assert workbook + worksheet = workbook.active + + header_titles = _read_column_header_titles(worksheet) + assert header_titles == ["URI", "List Identifier", "Collection Name"] + + +def test_fetch_list_paired_workbook(): + request = GenerateFetchWorkbookRequest( + type="collection", + collection_type="list:paired", + ) + workbook = generate(request) + assert workbook + worksheet = workbook.active + + header_titles = _read_column_header_titles(worksheet) + assert header_titles == ["URI 1 (Forward)", "URI 2 (Reverse)", "List Identifier"] + + if WRITE_TEST_WORKBOOKS: + path = "~/fetch_workbook_paired.xlsx" + expanded_path = os.path.expanduser(path) + workbook.save(expanded_path) + + +def test_fetch_list_paired_or_unpaired_workbook(): + request = GenerateFetchWorkbookRequest( + type="collection", + collection_type="list:paired_or_unpaired", + ) + workbook = generate(request) + assert workbook + worksheet = workbook.active + + header_titles = _read_column_header_titles(worksheet) + assert header_titles == ["URI 1 (Forward)", "URI 2 (Optional/Reverse)", "List Identifier"] + + +def test_parse_datasets(): + content = unittest_file_to_base64("fetch_workbook.xlsx") + parse_request = ParseFetchWorkbook( + content=content, + ) + parsed = parse(parse_request) + assert len(parsed.rows) == 1 + row0 = parsed.rows[0] + assert row0["url"] == "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed" + assert row0["name"] == "4.bed" + + assert len(parsed.columns) == 2 + + +def test_parse_paired_list(): + # workbook has URI 1 and URI 2 columns - make sure they are broken out and have a paired_indicator column + # for the rule builder. + content = unittest_file_to_base64("fetch_workbook_paired.xlsx") + parse_request = ParseFetchWorkbook( + content=content, + ) + parsed = parse(parse_request) + assert len(parsed.rows) == 2 + row0 = parsed.rows[0] + assert row0["url"] == "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bed" + assert row0["list_identifiers"] == "sample1" + assert row0["paired_identifier"] == "1" + + row1 = parsed.rows[1] + assert row1["url"] == "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed" + assert row1["list_identifiers"] == "sample1" + assert row1["paired_identifier"] == "2" + + assert len(parsed.columns) == 3 + + +def test_read_column_headers_from_titles(): + # datasets... + column_headers = _column_titles_to_headers(["URI", "Name", "Genome"]) + assert len(column_headers) == 3 + assert column_headers[0].type == "url" + assert column_headers[0].title == "URI" + assert column_headers[1].type == "name" + assert column_headers[1].title == "Name" + assert column_headers[2].type == "dbkey" + assert column_headers[2].title == "Genome" + + # simple list... + column_headers = _column_titles_to_headers(["URI", "List Identifier"]) + assert len(column_headers) == 2 + assert column_headers[0].type == "url" + assert column_headers[0].title == "URI" + assert column_headers[1].type == "list_identifiers" + assert column_headers[1].title == "List Identifier" + + # paired list with list two URIs per row.... + column_headers = _column_titles_to_headers(["URI 1 (Forward)", "URI 2 (Reverse)", "List Identifier"]) + assert len(column_headers) == 3 + assert column_headers[0].type == "url" + assert column_headers[0].title == "URI 1 (Forward)" + assert column_headers[0].type_index == 0 + + assert column_headers[1].type == "url" + assert column_headers[1].title == "URI 2 (Reverse)" + assert column_headers[1].type_index == 1 + + assert column_headers[2].type == "list_identifiers" + assert column_headers[2].title == "List Identifier" + + # paired list with paired identifier as a row... + column_headers = _column_titles_to_headers(["URI", "List Identifier", "Paired Identifier"]) + assert len(column_headers) == 3 + assert column_headers[0].type == "url" + assert column_headers[0].title == "URI" + assert column_headers[0].type_index == 0 + + assert column_headers[2].type == "paired_identifier" + assert column_headers[2].title == "Paired Identifier" + assert column_headers[2].type_index == 0 + + # nested list + column_headers = _column_titles_to_headers(["URI", "Outer List Identifier", "Inner List Identifier"]) + + assert len(column_headers) == 3 + assert column_headers[0].type == "url" + assert column_headers[0].title == "URI" + assert column_headers[0].type_index == 0 + + assert column_headers[1].type == "list_identifiers" + assert column_headers[1].title == "Outer List Identifier" + assert column_headers[1].type_index == 0 + + assert column_headers[2].type == "list_identifiers" + assert column_headers[2].title == "Inner List Identifier" + assert column_headers[2].type_index == 1 + + +def test_infer_fetch_workbook_collection_type(): + column_headers = _column_titles_to_headers(["URI", "List Identifier", "Genome"]) + collection_type = _infer_fetch_workbook_collection_type(column_headers)[0] + assert collection_type == "list" + + column_headers = _column_titles_to_headers(["URI", "List Identifier 1", "List Identifier 2", "Genome"]) + collection_type = _infer_fetch_workbook_collection_type(column_headers)[0] + assert collection_type == "list:list" + + column_headers = _column_titles_to_headers(["URI", "List Identifier", "Paired Identifier", "Genome"]) + collection_type = _infer_fetch_workbook_collection_type(column_headers)[0] + assert collection_type == "list:paired" + + # probably more usable - two URI style list:paired + column_headers = _column_titles_to_headers(["URI 1", "URI 2", "List Identifier", "Genome"]) + collection_type = _infer_fetch_workbook_collection_type(column_headers)[0] + assert collection_type == "list:paired" + + column_headers = _column_titles_to_headers( + ["URI", "List Identifier 1", "List Identifier 2", "Paired Identifier", "Genome"] + ) + collection_type = _infer_fetch_workbook_collection_type(column_headers)[0] + assert collection_type == "list:list:paired" + + column_headers = _column_titles_to_headers(["URI 1", "URI 2", "List Identifier 1", "List Identifier 2", "Genome"]) + collection_type = _infer_fetch_workbook_collection_type(column_headers)[0] + assert collection_type == "list:list:paired" + + # paired/unpaired sheets + column_headers = _column_titles_to_headers(["URI 1", "URI 2 (Optional)", "List Identifier", "Genome"]) + collection_type = _infer_fetch_workbook_collection_type(column_headers)[0] + assert collection_type == "list:paired_or_unpaired" + + # paired/unpaired sheets + column_headers = _column_titles_to_headers(["URI", "List Identifier", "Paired Identifier (Optional)", "Genome"]) + collection_type = _infer_fetch_workbook_collection_type(column_headers)[0] + assert collection_type == "list:paired_or_unpaired" + + +def test_column_target_model_parsing(): + target_models() + + +def test_validate_parsed_column_headers(): + headers = _column_titles_to_headers(["URI 1", "URI 2", "URI 3"]) + with pytest.raises(RequestParameterInvalidException) as exception_info: + _validate_parsed_column_headers(headers) + assert EXCEPTION_TOO_MANY_URI_COLUMNS in str(exception_info.value) + + headers = _column_titles_to_headers(["Name", "Paired Indicator"]) + with pytest.raises(RequestParameterInvalidException) as exception_info: + _validate_parsed_column_headers(headers) + assert EXCEPTION_NO_URIS_FOUND in str(exception_info.value) + + +def unittest_file_to_base64(filename: str) -> str: + path = resource_path("galaxy.app_unittest_utils", filename) + example_as_bytes = path.read_bytes() + content_base64 = base64.b64encode(example_as_bytes).decode("utf-8") + return content_base64 diff --git a/test/unit/data/dataset_collections/test_workbook_util.py b/test/unit/data/dataset_collections/test_workbook_util.py new file mode 100644 index 000000000000..042bcb497d32 --- /dev/null +++ b/test/unit/data/dataset_collections/test_workbook_util.py @@ -0,0 +1,24 @@ +import base64 + +import pytest + +from galaxy.exceptions import RequestParameterInvalidException +from galaxy.model.dataset_collections.workbook_util import ( + index_to_excel_column, + load_workbook_from_base64, +) + + +def test_index_to_excel_column(): + assert index_to_excel_column(0) == "A" + assert index_to_excel_column(25) == "Z" + assert index_to_excel_column(26) == "AA" + assert index_to_excel_column(700) == "ZY" + assert index_to_excel_column(701) == "ZZ" + assert index_to_excel_column(702) == "AAA" + + +def test_loading_invalid_workbook(): + base64Tabular = base64.b64encode(b"1\t2\t3\n").decode("utf-8") + with pytest.raises(RequestParameterInvalidException): + load_workbook_from_base64(base64Tabular) From 76fbadd790cb77c4a9847a036ecdcd7582d9ffab Mon Sep 17 00:00:00 2001 From: John Chilton Date: Wed, 14 May 2025 14:11:59 -0400 Subject: [PATCH 16/16] Rebuild schema for fetch workbook APIs. --- client/src/api/schema/schema.ts | 209 ++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index a61861be3eaf..b3ed1d67b287 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -4609,6 +4609,40 @@ export interface paths { patch?: never; trace?: never; }; + "/api/tools/fetch/workbook": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Generate a template workbook to use with the activity builder UI */ + get: operations["tools__fetch_workbook_download"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/tools/fetch/workbook/parse": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Generate a template workbook to use with the activity builder UI */ + post: operations["tools__fetch_workbook_parse"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; "/api/tools/{tool_id}/icon": { parameters: { query?: never; @@ -16872,6 +16906,88 @@ export interface components { * @default [] */ PageSummaryList: components["schemas"]["PageSummary"][]; + /** ParseFetchWorkbook */ + ParseFetchWorkbook: { + /** + * Workbook Content (Base 64 encoded) + * @description The workbook content (the contents of the xlsx file) that have been base64 encoded. + */ + content: string; + }; + /** ParseLogEntry */ + ParseLogEntry: { + /** Message */ + message: string; + }; + /** ParsedColumn */ + ParsedColumn: { + /** Title */ + title: string; + /** + * Type + * @enum {string} + */ + type: + | "list_identifiers" + | "paired_identifier" + | "paired_or_unpaired_identifier" + | "collection_name" + | "name_tag" + | "tags" + | "group_tags" + | "name" + | "dbkey" + | "hash_sha1" + | "hash_md5" + | "hash_sha256" + | "hash_sha512" + | "file_type" + | "url" + | "url_deferred" + | "info" + | "ftp_path"; + /** Type Index */ + type_index: number; + }; + /** ParsedFetchWorkbookForCollections */ + ParsedFetchWorkbookForCollections: { + /** + * Collection Type + * @enum {string} + */ + collection_type: "list" | "list:paired" | "list:list" | "list:list:paired" | "list:paired_or_unpaired"; + /** Columns */ + columns: components["schemas"]["ParsedColumn"][]; + /** Parse Log */ + parse_log: components["schemas"]["ParseLogEntry"][]; + /** Rows */ + rows: { + [key: string]: string; + }[]; + /** + * Workbook Type + * @default collection + * @enum {string} + */ + workbook_type: "datasets" | "collection" | "collections"; + }; + /** ParsedFetchWorkbookForDatasets */ + ParsedFetchWorkbookForDatasets: { + /** Columns */ + columns: components["schemas"]["ParsedColumn"][]; + /** Parse Log */ + parse_log: components["schemas"]["ParseLogEntry"][]; + /** Rows */ + rows: { + [key: string]: string; + }[]; + /** + * Workbook Type + * @default datasets + * @enum {string} + */ + workbook_type: "datasets" | "collection" | "collections"; + }; /** PastedDataElement */ PastedDataElement: { /** Md5 */ @@ -36370,6 +36486,99 @@ export interface operations { }; }; }; + tools__fetch_workbook_download: { + parameters: { + query?: { + /** @description Generate a workbook for simple datasets or a collection. */ + type?: "datasets" | "collection" | "collections"; + /** @description Generate workbook for specified collection type (not all collection types are supported) */ + collection_type?: "list" | "list:paired" | "list:list" | "list:list:paired" | "list:paired_or_unpaired"; + /** @description Filename of the workbook download to generate */ + filename?: string | null; + }; + header?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + "run-as"?: string | null; + }; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + /** @description Request Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + }; + }; + tools__fetch_workbook_parse: { + parameters: { + query?: never; + header?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + "run-as"?: string | null; + }; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["ParseFetchWorkbook"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": + | components["schemas"]["ParsedFetchWorkbookForDatasets"] + | components["schemas"]["ParsedFetchWorkbookForCollections"]; + }; + }; + /** @description Request Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + }; + }; get_icon_api_tools__tool_id__icon_get: { parameters: { query?: never;