Skip to content

Commit 636583e

Browse files
committed
refactor: use existing subfolder IDs instead of auto-creating folders
- Update projects.yml to store subfolder IDs (e.g., WES_Level_1: syn69630475 for HTAN2_Ovarian) - Modify binding script to use existing subfolders instead of creating new ones - Remove folder creation functions (no longer needed) - Update workflow description to reflect the new approach
1 parent be6de46 commit 636583e

File tree

3 files changed

+53
-49
lines changed

3 files changed

+53
-49
lines changed

.github/workflows/json-schema-synapse.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,20 +134,20 @@ jobs:
134134
fi
135135
done
136136
137-
- name: Bind file-based schemas to project folders
137+
- name: Bind file-based schemas to existing project subfolders
138138
env:
139139
SYNAPSE_USERNAME: ${{ secrets.SYNAPSE_USERNAME }}
140140
SYNAPSE_AUTH_TOKEN: ${{ secrets.SYNAPSE_AUTH_TOKEN }}
141141
run: |
142-
# For now, test with just one project (HTAN2_Ovarian) and WES Level 1 schema
143-
# This will create a WES_Level_1 subfolder and bind the schema with a fileview
142+
# Test with HTAN2_Ovarian and existing WES_Level_1 subfolder (syn69630475)
143+
# This will bind the schema to the existing subfolder and create a fileview
144144
145145
# Find WES Level 1 schema
146146
WES_SCHEMA=$(find JSON_Schemas -name "*WES*Level*1*" -o -name "*level_1*" | head -1)
147147
148148
if [ -n "$WES_SCHEMA" ]; then
149149
echo "Found WES Level 1 schema: $WES_SCHEMA"
150-
echo "Binding to HTAN2_Ovarian project..."
150+
echo "Binding to existing WES_Level_1 subfolder in HTAN2_Ovarian project..."
151151
152152
poetry run python scripts/bind_file_based_schemas.py \
153153
--schema-file "$WES_SCHEMA" \
@@ -157,7 +157,7 @@ jobs:
157157
echo "No WES Level 1 schema found, skipping file-based binding"
158158
fi
159159
160-
# TODO: Later expand to bind all file-based schemas to all projects
160+
# TODO: Later expand to bind all file-based schemas to all projects with existing subfolders
161161
# for project in HTAN2_Ovarian HTAN2_Glioma HTAN2_Gastric; do
162162
# poetry run python scripts/bind_file_based_schemas.py \
163163
# --schema-file "$WES_SCHEMA" \

config/projects.yml

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,37 @@
1-
# HTAN2 Project Synapse IDs
1+
# HTAN2 Project Synapse IDs and subfolder mappings
22
# Used for binding JSON schemas to the correct project folders
33
projects:
4-
HTAN2_Ovarian: syn63298044
5-
HTAN2_Glioma: syn63298048
6-
HTAN2_Gastric: syn63298051
7-
HTAN2_Skin: syn63298054
8-
HTAN2_Pediatric: syn63298059
9-
HTAN2_Myeloma: syn63298063
10-
HTAN2_Pancreatic: syn63298065
11-
HTAN2_Prostate: syn63298068
12-
HTAN2_CRC: syn63298073
13-
HTAN2_Lymphoma: syn63298076
4+
HTAN2_Ovarian:
5+
project_id: syn63298044
6+
subfolders:
7+
WES_Level_1: syn69630475
8+
HTAN2_Glioma:
9+
project_id: syn63298048
10+
subfolders: {}
11+
HTAN2_Gastric:
12+
project_id: syn63298051
13+
subfolders: {}
14+
HTAN2_Skin:
15+
project_id: syn63298054
16+
subfolders: {}
17+
HTAN2_Pediatric:
18+
project_id: syn63298059
19+
subfolders: {}
20+
HTAN2_Myeloma:
21+
project_id: syn63298063
22+
subfolders: {}
23+
HTAN2_Pancreatic:
24+
project_id: syn63298065
25+
subfolders: {}
26+
HTAN2_Prostate:
27+
project_id: syn63298068
28+
subfolders: {}
29+
HTAN2_CRC:
30+
project_id: syn63298073
31+
subfolders: {}
32+
HTAN2_Lymphoma:
33+
project_id: syn63298076
34+
subfolders: {}
1435

1536
# Schema type classification
1637
schema_types:

scripts/bind_file_based_schemas.py

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -119,30 +119,7 @@ def _create_columns_from_json_schema(json_schema: Dict[str, Any]) -> List[Column
119119
return columns
120120

121121

122-
def create_or_get_folder(syn: Synapse, parent_id: str, folder_name: str) -> str:
123-
"""Create a folder if it doesn't exist, or get the existing folder ID."""
124-
try:
125-
# Try to get existing folder
126-
folder = syn.get(parent_id)
127-
children = syn.getChildren(folder)
128-
129-
for child in children:
130-
if child['name'] == folder_name and child['type'] == 'org.sagebionetworks.repo.model.Folder':
131-
print(f"Found existing folder: {folder_name} (ID: {child['id']})")
132-
return child['id']
133-
134-
# Create new folder if not found
135-
print(f"Creating new folder: {folder_name}")
136-
folder = syn.store({
137-
'name': folder_name,
138-
'parentId': parent_id,
139-
'concreteType': 'org.sagebionetworks.repo.model.Folder'
140-
})
141-
return folder['id']
142-
143-
except Exception as e:
144-
print(f"Error creating/getting folder {folder_name}: {e}")
145-
raise
122+
146123

147124

148125
def create_fileview_from_schema(syn: Synapse, schema_file: str, folder_id: str, view_name: str) -> str:
@@ -189,7 +166,7 @@ def main():
189166
parser = argparse.ArgumentParser(description='Bind file-based schemas to Synapse project folders')
190167
parser.add_argument('--schema-file', required=True, help='Path to the JSON schema file')
191168
parser.add_argument('--project-name', required=True, help='Name of the project (e.g., HTAN2_Ovarian)')
192-
parser.add_argument('--subfolder-name', default='WES_Level_1', help='Name of the subfolder to create (default: WES_Level_1)')
169+
parser.add_argument('--subfolder-name', default='WES_Level_1', help='Name of the subfolder to use (default: WES_Level_1)')
193170

194171
args = parser.parse_args()
195172

@@ -202,14 +179,23 @@ def main():
202179
print(f"Error: Config file not found at {config_path}")
203180
sys.exit(1)
204181

205-
# Get project ID
206-
project_id = config['projects'].get(args.project_name)
207-
if not project_id:
182+
# Get project info
183+
project_info = config['projects'].get(args.project_name)
184+
if not project_info:
208185
print(f"Error: Project '{args.project_name}' not found in config")
209186
print(f"Available projects: {list(config['projects'].keys())}")
210187
sys.exit(1)
211188

189+
project_id = project_info['project_id']
190+
subfolder_id = project_info['subfolders'].get(args.subfolder_name)
191+
192+
if not subfolder_id:
193+
print(f"Error: Subfolder '{args.subfolder_name}' not found for project '{args.project_name}'")
194+
print(f"Available subfolders: {list(project_info['subfolders'].keys())}")
195+
sys.exit(1)
196+
212197
print(f"Binding schema to project: {args.project_name} (ID: {project_id})")
198+
print(f"Using existing subfolder: {args.subfolder_name} (ID: {subfolder_id})")
213199

214200
# Initialize Synapse client
215201
try:
@@ -220,15 +206,12 @@ def main():
220206
sys.exit(1)
221207

222208
try:
223-
# Create or get the subfolder
224-
subfolder_id = create_or_get_folder(syn, project_id, args.subfolder_name)
225-
226-
# Create fileview from the schema
209+
# Create fileview from the schema in the existing subfolder
227210
schema_name = os.path.splitext(os.path.basename(args.schema_file))[0]
228211
view_name = f"{args.subfolder_name}_FileView"
229212
fileview_id = create_fileview_from_schema(syn, args.schema_file, subfolder_id, view_name)
230213

231-
print(f"Successfully bound schema to folder: {subfolder_id}")
214+
print(f"Successfully bound schema to existing folder: {subfolder_id}")
232215
print(f"Created fileview: {fileview_id}")
233216

234217
except Exception as e:

0 commit comments

Comments
 (0)