Skip to content

Commit 2f6db4e

Browse files
authored
Merge pull request #809 from john-hill/PLFM-9340.2
Switch to TemplateSetValue with features for combining data from mult…
2 parents 71cf6f3 + 6989e8e commit 2f6db4e

File tree

2 files changed

+43
-15
lines changed

2 files changed

+43
-15
lines changed

src/main/resources/templates/repo/agent/grid/grid-agent-instructions.txt

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,29 @@
7575
<update__json>{"updateBatch":[{"set":[{"concreteType":"org.sagebionetworks.repo.model.grid.update.LiteralSetValue","columnName":"status","value":"active"}],"filters":[{"concreteType":"org.sagebionetworks.repo.model.grid.query.CellValueFilter","columnName":"age","operator":"GREATER_THAN","value":18}]},{"set":[{"concreteType":"org.sagebionetworks.repo.model.grid.update.LiteralSetValue","columnName":"category","value":"senior"}],"filters":[{"concreteType":"org.sagebionetworks.repo.model.grid.query.CellValueFilter","columnName":"age","operator":"GREATER_THAN_OR_EQUALS","value":65}]},{"set":[{"concreteType":"org.sagebionetworks.repo.model.grid.update.LiteralSetValue","columnName":"discount","value":0.15}],"filters":[{"concreteType":"org.sagebionetworks.repo.model.grid.query.RowSelectionFilter","isSelected":true}]}]}</update__json>
7676
</update__example>
7777
<update__example>
78-
<description>Extract participant ID from fileName column using regex pattern 'participant-(\d+)' and set it to the participantId column. If the pattern doesn't match, set participantId to null.</description>
79-
<update__json>{"updateBatch":[{"set":[{"concreteType":"org.sagebionetworks.repo.model.grid.update.RegexExtractSetValue","columnName":"participantId","sourceColumnName":"fileName","pattern":"participant-(\\d+)","groupIndex":1,"onMatchFailure":"SET_NULL"}],"filters":[{"concreteType":"org.sagebionetworks.repo.model.grid.query.CellValueFilter","columnName":"fileName","operator":"IS_NOT_NULL"}]}]}</update__json>
78+
<description>Use a template to combine firstName and lastName columns into fullName with a space separator.</description>
79+
<update__json>{"updateBatch":[{"set":[{"concreteType":"org.sagebionetworks.repo.model.grid.update.TemplateSetValue","columnName":"fullName","sourceTemplate":"{firstName} {lastName}","onMatchFailure":"SET_NULL"}],"filters":[{"concreteType":"org.sagebionetworks.repo.model.grid.query.CellValueFilter","columnName":"firstName","operator":"IS_NOT_NULL"}]}]}</update__json>
80+
</update__example>
81+
<update__example>
82+
<description>Extract domain from email using regex pattern, treating missing email values as empty strings (which won't match the pattern)</description>
83+
<update__json>{"updateBatch":[{"set":[{"concreteType":"org.sagebionetworks.repo.model.grid.update.TemplateSetValue","columnName":"domain","sourceTemplate":"{email}","pattern":"@(.+)$","onMatchFailure":"SET_NULL","onMissingValue":"USE_EMPTY_STRING"}],"filters":[{"concreteType":"org.sagebionetworks.repo.model.grid.query.CellValueFilter","columnName":"email","operator":"IS_NOT_NULL"}]}]}</update__json>
84+
</update__example>
85+
<update__example>
86+
<description>Build full file path from bucket, folder, and filename columns; skip updating rows where any source column is missing.</description>
87+
<update__json>{"updateBatch":[{"set":[{"concreteType":"org.sagebionetworks.repo.model.grid.update.TemplateSetValue","columnName":"fullPath","sourceTemplate":"{bucket}/{folder}/{filename}","onMissingValue":"SKIP_UPDATE"}],"filters":[{"concreteType":"org.sagebionetworks.repo.model.grid.query.CellValueFilter","columnName":"fullPath","operator":"IS_NULL"}]}]}</update__json>
88+
</update__example>
89+
<update__example>
90+
<description>Reformat phone numbers from '(555) 123-4567' to '555-123-4567' by removing parentheses using regex replacement. Note: This pattern assumes consistent formatting; rows with different formats will trigger onMatchFailure behavior. </description>
91+
<update__json>{"updateBatch":[{"set":[{"concreteType":"org.sagebionetworks.repo.model.grid.update.TemplateSetValue","columnName":"phone","sourceTemplate":"{phone}","pattern":"\\((\\d{3})\\)\\s*(\\d{3}-\\d{4})","replacement":"$1-$2","onMatchFailure":"SKIP_UPDATE"}],"filters":[{"concreteType":"org.sagebionetworks.repo.model.grid.query.CellValueFilter","columnName":"phone","operator":"LIKE","value":"(%"}]}]}</update__json>
8092
</update__example>
8193
</examples>
8294
</update__examples>
95+
<template_set_value>
96+
TemplateSetValue enables complex data transformations by:
97+
- Interpolating values from multiple columns using {columnName} placeholders
98+
- Applying optional regex patterns for extraction or transformation
99+
- Providing configurable fallback behaviors for missing data or pattern mismatches
100+
</template_set_value>
83101
</tools>
84102
</operational_context>
85103

src/main/resources/templates/repo/agent/grid/grid_agent_open_api.json

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -95,41 +95,51 @@
9595
},
9696
"description": "Results of a query against a grid session."
9797
},
98-
"org.sagebionetworks.repo.model.grid.update.RegexExtractSetValue": {
98+
"org.sagebionetworks.repo.model.grid.update.TemplateSetValue": {
9999
"type": "object",
100100
"properties": {
101101
"concreteType": {
102102
"type": "string",
103103
"description": "Required. Full name of the exact type used.",
104-
"enum": ["org.sagebionetworks.repo.model.grid.update.RegexExtractSetValue"]
104+
"enum": ["org.sagebionetworks.repo.model.grid.update.TemplateSetValue"]
105105
},
106106
"columnName": {
107107
"type": "string",
108108
"description": "Target column name to update."
109109
},
110-
"sourceColumnName": {
110+
"sourceTemplate": {
111111
"type": "string",
112-
"description": "The name of the column to read the value from (e.g., 'fileName')."
112+
"description": "Required. Template string with column placeholders in {columnName} format. Example: '{firstName} {lastName}' concatenates two columns with a space. Invalid column names in placeholders will trigger onMissingValue behavior."
113113
},
114114
"pattern": {
115115
"type": "string",
116-
"description": "The regular expression to apply. Must include at least one capture group (e.g., 'site-\\d+/(\\w+)/.*')."
116+
"description": "Optional. A Java-compatible regex pattern to apply to the intermediate string generated by 'sourceTemplate'."
117117
},
118-
"groupIndex": {
119-
"type": "integer",
120-
"description": "The 1-based index of the capture group to extract.",
121-
"format": "int32"
118+
"replacement": {
119+
"type": "string",
120+
"description": "Optional. The replacement string using regex capture group references ($1, $2, etc.). Only used when 'pattern' is provided. Examples: '$1-$2' to join groups with a dash, '$2-group-$1' to reorder. If 'pattern' is specified without 'replacement', defaults to '$1' (extracts first capture group)."
122121
},
123122
"onMatchFailure": {
124123
"type": "string",
125-
"description": "Action to take if the regex does not match the source value. Default SET_NULL.",
124+
"description": "Action if 'pattern' is provided but does not match. SET_NULL sets the value to NULL. SET_UNDEFINED removes the key from the JSON object. SKIP_UPDATE leaves the existing value unchanged.",
126125
"enum": [
127126
"SET_NULL",
127+
"SET_UNDEFINED",
128128
"SKIP_UPDATE"
129129
]
130+
},
131+
"onMissingValue": {
132+
"type": "string",
133+
"description": "Action if a column referenced in 'sourceTemplate' is null/undefined. SET_NULL sets the value to NULL. SET_UNDEFINED removes the key from the JSON object. SKIP_UPDATE leaves the existing value unchanged. USE_EMPTY_STRING replaces the missing value with '' (empty string) when interpolating the template.",
134+
"enum": [
135+
"SET_NULL",
136+
"SET_UNDEFINED",
137+
"SKIP_UPDATE",
138+
"USE_EMPTY_STRING"
139+
]
130140
}
131141
},
132-
"description": "Sets the column's value by extracting a capture group from a regex pattern applied to a source column.",
142+
"description": "Sets a column's value by composing a string from a source template, then optionally applying a regex-replace to that intermediate string.",
133143
"required": ["concreteType"]
134144
},
135145
"org.sagebionetworks.repo.model.grid.update.GridUpdateRequest": {
@@ -651,8 +661,8 @@
651661
},
652662
"description": "",
653663
"oneOf": [
654-
{"$ref": "#/components/schemas/org.sagebionetworks.repo.model.grid.update.RegexExtractSetValue"},
655-
{"$ref": "#/components/schemas/org.sagebionetworks.repo.model.grid.update.LiteralSetValue"}
664+
{"$ref": "#/components/schemas/org.sagebionetworks.repo.model.grid.update.LiteralSetValue"},
665+
{"$ref": "#/components/schemas/org.sagebionetworks.repo.model.grid.update.TemplateSetValue"}
656666
],
657667
"required": ["concreteType"],
658668
"discriminator": {"propertyName": "concreteType"}

0 commit comments

Comments
 (0)