Skip to content

Commit fdd3d74

Browse files
authored
[SYNPY-1580] Adds VirtualTable OOP Model (#1195)
* adds virtualtable model * adds tests * adds docs + tutorial * pre-commit * removes spaces * fix tutorial code lines * updates docstrings * fixes unit tests * updates flaky integration tests * adds missing unit test * adds special handling for JOIN and UNION * move import * removes custom exception * sonar fixes * try upgrade pip * .
1 parent 9fdc692 commit fdd3d74

File tree

14 files changed

+2420
-8
lines changed

14 files changed

+2420
-8
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ jobs:
8989
if: steps.cache-dependencies.outputs.cache-hit != 'true'
9090
shell: bash
9191
run: |
92+
python -m pip install --upgrade pip
93+
9294
pip install -e ".[boto3,pandas,pysftp,tests]"
9395
9496
# ensure that numpy c extensions are installed on windows
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# VirtualTable
2+
3+
Contained within this file are experimental interfaces for working with the Synapse Python
4+
Client. Unless otherwise noted these interfaces are subject to change at any time. Use
5+
at your own risk.
6+
7+
## API reference
8+
9+
::: synapseclient.models.VirtualTable
10+
options:
11+
inherited_members: true
12+
members:
13+
- store_async
14+
- get_async
15+
- delete_async
16+
- query_async
17+
- query_part_mask_async
18+
- get_permissions
19+
- get_acl
20+
- set_permissions
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# VirtualTable
2+
3+
Contained within this file are experimental interfaces for working with the Synapse Python
4+
Client. Unless otherwise noted these interfaces are subject to change at any time. Use
5+
at your own risk.
6+
7+
## API reference
8+
9+
::: synapseclient.models.VirtualTable
10+
options:
11+
inherited_members: true
12+
members:
13+
- store
14+
- get
15+
- delete
16+
- query
17+
- query_part_mask
18+
- get_permissions
19+
- get_acl
20+
- set_permissions
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
"""Here is where you'll find the code for the VirtualTable tutorial."""
2+
3+
import pandas as pd
4+
5+
from synapseclient import Synapse
6+
from synapseclient.models import Column, ColumnType, Project, Table, VirtualTable
7+
8+
# Initialize Synapse client
9+
syn = Synapse()
10+
syn.login()
11+
12+
# Get the project where we want to create the virtual table
13+
project = Project(name="My uniquely named project about Alzheimer's Disease").get()
14+
project_id = project.id
15+
print(f"Got project with ID: {project_id}")
16+
17+
# Create the first table with some columns and rows
18+
table1_columns = [
19+
Column(name="sample_id", column_type=ColumnType.STRING),
20+
Column(name="patient_id", column_type=ColumnType.STRING),
21+
Column(name="age", column_type=ColumnType.INTEGER),
22+
Column(name="diagnosis", column_type=ColumnType.STRING),
23+
]
24+
25+
table1 = Table(
26+
name="Patient Demographics",
27+
parent_id=project_id,
28+
columns=table1_columns,
29+
)
30+
table1 = table1.store()
31+
print(f"Created table 1 with ID: {table1.id}")
32+
33+
# Add rows to the first table
34+
data1 = pd.DataFrame(
35+
[
36+
{"sample_id": "S1", "patient_id": "P1", "age": 70, "diagnosis": "Alzheimer's"},
37+
{"sample_id": "S2", "patient_id": "P2", "age": 65, "diagnosis": "Healthy"},
38+
{"sample_id": "S3", "patient_id": "P3", "age": 72, "diagnosis": "Alzheimer's"},
39+
{"sample_id": "S4", "patient_id": "P4", "age": 68, "diagnosis": "Healthy"},
40+
{"sample_id": "S5", "patient_id": "P5", "age": 75, "diagnosis": "Alzheimer's"},
41+
{"sample_id": "S6", "patient_id": "P6", "age": 80, "diagnosis": "Healthy"},
42+
]
43+
)
44+
table1.upsert_rows(values=data1, primary_keys=["sample_id"])
45+
46+
# Create the second table with some columns and rows
47+
table2_columns = [
48+
Column(name="sample_id", column_type=ColumnType.STRING),
49+
Column(name="gene", column_type=ColumnType.STRING),
50+
Column(name="expression_level", column_type=ColumnType.DOUBLE),
51+
]
52+
53+
table2 = Table(
54+
name="Gene Expression Data",
55+
parent_id=project_id,
56+
columns=table2_columns,
57+
)
58+
table2 = table2.store()
59+
print(f"Created table 2 with ID: {table2.id}")
60+
61+
# Add rows to the second table
62+
data2 = pd.DataFrame(
63+
[
64+
{"sample_id": "S1", "gene": "APOE", "expression_level": 2.5},
65+
{"sample_id": "S2", "gene": "APP", "expression_level": 1.8},
66+
{"sample_id": "S3", "gene": "PSEN1", "expression_level": 3.2},
67+
{"sample_id": "S4", "gene": "MAPT", "expression_level": 2.1},
68+
{"sample_id": "S5", "gene": "APP", "expression_level": 3.5},
69+
{"sample_id": "S7", "gene": "PSEN2", "expression_level": 1.9},
70+
]
71+
)
72+
table2.upsert_rows(values=data2, primary_keys=["sample_id"])
73+
# Note: VirtualTables do not support JOIN or UNION operations in the defining_sql.
74+
# If you need to combine data from multiple tables, consider using a MaterializedView instead.
75+
76+
77+
def create_basic_virtual_table():
78+
"""
79+
Example: Create a basic virtual table with a simple SELECT query.
80+
"""
81+
virtual_table = VirtualTable(
82+
name="Patient Data View",
83+
description="A virtual table showing patient demographics",
84+
parent_id=project_id,
85+
defining_sql=f"SELECT * FROM {table1.id}",
86+
)
87+
virtual_table = virtual_table.store()
88+
print(f"Created Virtual Table with ID: {virtual_table.id}")
89+
90+
virtual_table_id = virtual_table.id
91+
92+
query = f"SELECT * FROM {virtual_table_id}"
93+
query_result: pd.DataFrame = virtual_table.query(
94+
query=query, include_row_id_and_row_version=False
95+
)
96+
97+
# Print the results to the console
98+
print("Results from the basic virtual table:")
99+
print(query_result)
100+
101+
102+
def create_virtual_table_with_column_selection():
103+
"""
104+
Example: Create a virtual table that selects only specific columns.
105+
"""
106+
virtual_table = VirtualTable(
107+
name="Patient Age View",
108+
description="A virtual table showing only patient IDs and ages",
109+
parent_id=project_id,
110+
defining_sql=f"SELECT patient_id, age FROM {table1.id}",
111+
)
112+
virtual_table = virtual_table.store()
113+
print(f"Created Virtual Table with ID: {virtual_table.id}")
114+
115+
virtual_table_id = virtual_table.id
116+
117+
query = f"SELECT * FROM {virtual_table_id}"
118+
query_result: pd.DataFrame = virtual_table.query(
119+
query=query, include_row_id_and_row_version=False
120+
)
121+
122+
# Print the results to the console
123+
print("Results from the virtual table with column selection:")
124+
print(query_result)
125+
126+
127+
def create_virtual_table_with_filtering():
128+
"""
129+
Example: Create a virtual table with a WHERE clause for filtering.
130+
"""
131+
virtual_table = VirtualTable(
132+
name="Alzheimer's Patients",
133+
description="A virtual table showing only patients with Alzheimer's",
134+
parent_id=project_id,
135+
defining_sql=f"SELECT * FROM {table1.id} WHERE diagnosis = 'Alzheimer''s'",
136+
)
137+
virtual_table = virtual_table.store()
138+
print(f"Created Virtual Table with ID: {virtual_table.id}")
139+
140+
virtual_table_id = virtual_table.id
141+
142+
query = f"SELECT * FROM {virtual_table_id}"
143+
query_result: pd.DataFrame = virtual_table.query(
144+
query=query, include_row_id_and_row_version=False
145+
)
146+
147+
# Print the results to the console
148+
print("Results from the virtual table with filtering:")
149+
print(query_result)
150+
151+
152+
def create_virtual_table_with_ordering():
153+
"""
154+
Example: Create a virtual table with an ORDER BY clause.
155+
"""
156+
virtual_table = VirtualTable(
157+
name="Patients by Age",
158+
description="A virtual table showing patients ordered by age",
159+
parent_id=project_id,
160+
defining_sql=f"SELECT * FROM {table1.id} ORDER BY age DESC",
161+
)
162+
virtual_table = virtual_table.store()
163+
print(f"Created Virtual Table with ID: {virtual_table.id}")
164+
165+
virtual_table_id = virtual_table.id
166+
167+
query = f"SELECT * FROM {virtual_table_id}"
168+
query_result: pd.DataFrame = virtual_table.query(
169+
query=query, include_row_id_and_row_version=False
170+
)
171+
172+
# Print the results to the console
173+
print("Results from the virtual table with ordering:")
174+
print(query_result)
175+
176+
177+
def create_virtual_table_with_aggregation():
178+
"""
179+
Example: Create a virtual table with an aggregate function.
180+
"""
181+
virtual_table = VirtualTable(
182+
name="Diagnosis Count",
183+
description="A virtual table showing the count of patients by diagnosis",
184+
parent_id=project_id,
185+
defining_sql=f"SELECT diagnosis, COUNT(*) AS patient_count FROM {table1.id} GROUP BY diagnosis",
186+
)
187+
virtual_table = virtual_table.store()
188+
print(f"Created Virtual Table with ID: {virtual_table.id}")
189+
190+
virtual_table_id = virtual_table.id
191+
192+
query = f"SELECT * FROM {virtual_table_id}"
193+
query_result: pd.DataFrame = virtual_table.query(
194+
query=query, include_row_id_and_row_version=False
195+
)
196+
197+
# Print the results to the console
198+
print("Results from the virtual table with aggregation:")
199+
print(query_result)
200+
201+
202+
def main():
203+
create_basic_virtual_table()
204+
create_virtual_table_with_column_selection()
205+
create_virtual_table_with_filtering()
206+
create_virtual_table_with_ordering()
207+
create_virtual_table_with_aggregation()
208+
209+
210+
if __name__ == "__main__":
211+
main()

0 commit comments

Comments
 (0)