|
| 1 | +"""Here is where you'll find the code for the VirtualTable tutorial.""" |
| 2 | + |
| 3 | +import pandas as pd |
| 4 | + |
| 5 | +from synapseclient import Synapse |
| 6 | +from synapseclient.models import Column, ColumnType, Project, Table, VirtualTable |
| 7 | + |
| 8 | +# Initialize Synapse client |
| 9 | +syn = Synapse() |
| 10 | +syn.login() |
| 11 | + |
| 12 | +# Get the project where we want to create the virtual table |
| 13 | +project = Project(name="My uniquely named project about Alzheimer's Disease").get() |
| 14 | +project_id = project.id |
| 15 | +print(f"Got project with ID: {project_id}") |
| 16 | + |
| 17 | +# Create the first table with some columns and rows |
| 18 | +table1_columns = [ |
| 19 | + Column(name="sample_id", column_type=ColumnType.STRING), |
| 20 | + Column(name="patient_id", column_type=ColumnType.STRING), |
| 21 | + Column(name="age", column_type=ColumnType.INTEGER), |
| 22 | + Column(name="diagnosis", column_type=ColumnType.STRING), |
| 23 | +] |
| 24 | + |
| 25 | +table1 = Table( |
| 26 | + name="Patient Demographics", |
| 27 | + parent_id=project_id, |
| 28 | + columns=table1_columns, |
| 29 | +) |
| 30 | +table1 = table1.store() |
| 31 | +print(f"Created table 1 with ID: {table1.id}") |
| 32 | + |
| 33 | +# Add rows to the first table |
| 34 | +data1 = pd.DataFrame( |
| 35 | + [ |
| 36 | + {"sample_id": "S1", "patient_id": "P1", "age": 70, "diagnosis": "Alzheimer's"}, |
| 37 | + {"sample_id": "S2", "patient_id": "P2", "age": 65, "diagnosis": "Healthy"}, |
| 38 | + {"sample_id": "S3", "patient_id": "P3", "age": 72, "diagnosis": "Alzheimer's"}, |
| 39 | + {"sample_id": "S4", "patient_id": "P4", "age": 68, "diagnosis": "Healthy"}, |
| 40 | + {"sample_id": "S5", "patient_id": "P5", "age": 75, "diagnosis": "Alzheimer's"}, |
| 41 | + {"sample_id": "S6", "patient_id": "P6", "age": 80, "diagnosis": "Healthy"}, |
| 42 | + ] |
| 43 | +) |
| 44 | +table1.upsert_rows(values=data1, primary_keys=["sample_id"]) |
| 45 | + |
| 46 | +# Create the second table with some columns and rows |
| 47 | +table2_columns = [ |
| 48 | + Column(name="sample_id", column_type=ColumnType.STRING), |
| 49 | + Column(name="gene", column_type=ColumnType.STRING), |
| 50 | + Column(name="expression_level", column_type=ColumnType.DOUBLE), |
| 51 | +] |
| 52 | + |
| 53 | +table2 = Table( |
| 54 | + name="Gene Expression Data", |
| 55 | + parent_id=project_id, |
| 56 | + columns=table2_columns, |
| 57 | +) |
| 58 | +table2 = table2.store() |
| 59 | +print(f"Created table 2 with ID: {table2.id}") |
| 60 | + |
| 61 | +# Add rows to the second table |
| 62 | +data2 = pd.DataFrame( |
| 63 | + [ |
| 64 | + {"sample_id": "S1", "gene": "APOE", "expression_level": 2.5}, |
| 65 | + {"sample_id": "S2", "gene": "APP", "expression_level": 1.8}, |
| 66 | + {"sample_id": "S3", "gene": "PSEN1", "expression_level": 3.2}, |
| 67 | + {"sample_id": "S4", "gene": "MAPT", "expression_level": 2.1}, |
| 68 | + {"sample_id": "S5", "gene": "APP", "expression_level": 3.5}, |
| 69 | + {"sample_id": "S7", "gene": "PSEN2", "expression_level": 1.9}, |
| 70 | + ] |
| 71 | +) |
| 72 | +table2.upsert_rows(values=data2, primary_keys=["sample_id"]) |
| 73 | +# Note: VirtualTables do not support JOIN or UNION operations in the defining_sql. |
| 74 | +# If you need to combine data from multiple tables, consider using a MaterializedView instead. |
| 75 | + |
| 76 | + |
| 77 | +def create_basic_virtual_table(): |
| 78 | + """ |
| 79 | + Example: Create a basic virtual table with a simple SELECT query. |
| 80 | + """ |
| 81 | + virtual_table = VirtualTable( |
| 82 | + name="Patient Data View", |
| 83 | + description="A virtual table showing patient demographics", |
| 84 | + parent_id=project_id, |
| 85 | + defining_sql=f"SELECT * FROM {table1.id}", |
| 86 | + ) |
| 87 | + virtual_table = virtual_table.store() |
| 88 | + print(f"Created Virtual Table with ID: {virtual_table.id}") |
| 89 | + |
| 90 | + virtual_table_id = virtual_table.id |
| 91 | + |
| 92 | + query = f"SELECT * FROM {virtual_table_id}" |
| 93 | + query_result: pd.DataFrame = virtual_table.query( |
| 94 | + query=query, include_row_id_and_row_version=False |
| 95 | + ) |
| 96 | + |
| 97 | + # Print the results to the console |
| 98 | + print("Results from the basic virtual table:") |
| 99 | + print(query_result) |
| 100 | + |
| 101 | + |
| 102 | +def create_virtual_table_with_column_selection(): |
| 103 | + """ |
| 104 | + Example: Create a virtual table that selects only specific columns. |
| 105 | + """ |
| 106 | + virtual_table = VirtualTable( |
| 107 | + name="Patient Age View", |
| 108 | + description="A virtual table showing only patient IDs and ages", |
| 109 | + parent_id=project_id, |
| 110 | + defining_sql=f"SELECT patient_id, age FROM {table1.id}", |
| 111 | + ) |
| 112 | + virtual_table = virtual_table.store() |
| 113 | + print(f"Created Virtual Table with ID: {virtual_table.id}") |
| 114 | + |
| 115 | + virtual_table_id = virtual_table.id |
| 116 | + |
| 117 | + query = f"SELECT * FROM {virtual_table_id}" |
| 118 | + query_result: pd.DataFrame = virtual_table.query( |
| 119 | + query=query, include_row_id_and_row_version=False |
| 120 | + ) |
| 121 | + |
| 122 | + # Print the results to the console |
| 123 | + print("Results from the virtual table with column selection:") |
| 124 | + print(query_result) |
| 125 | + |
| 126 | + |
| 127 | +def create_virtual_table_with_filtering(): |
| 128 | + """ |
| 129 | + Example: Create a virtual table with a WHERE clause for filtering. |
| 130 | + """ |
| 131 | + virtual_table = VirtualTable( |
| 132 | + name="Alzheimer's Patients", |
| 133 | + description="A virtual table showing only patients with Alzheimer's", |
| 134 | + parent_id=project_id, |
| 135 | + defining_sql=f"SELECT * FROM {table1.id} WHERE diagnosis = 'Alzheimer''s'", |
| 136 | + ) |
| 137 | + virtual_table = virtual_table.store() |
| 138 | + print(f"Created Virtual Table with ID: {virtual_table.id}") |
| 139 | + |
| 140 | + virtual_table_id = virtual_table.id |
| 141 | + |
| 142 | + query = f"SELECT * FROM {virtual_table_id}" |
| 143 | + query_result: pd.DataFrame = virtual_table.query( |
| 144 | + query=query, include_row_id_and_row_version=False |
| 145 | + ) |
| 146 | + |
| 147 | + # Print the results to the console |
| 148 | + print("Results from the virtual table with filtering:") |
| 149 | + print(query_result) |
| 150 | + |
| 151 | + |
| 152 | +def create_virtual_table_with_ordering(): |
| 153 | + """ |
| 154 | + Example: Create a virtual table with an ORDER BY clause. |
| 155 | + """ |
| 156 | + virtual_table = VirtualTable( |
| 157 | + name="Patients by Age", |
| 158 | + description="A virtual table showing patients ordered by age", |
| 159 | + parent_id=project_id, |
| 160 | + defining_sql=f"SELECT * FROM {table1.id} ORDER BY age DESC", |
| 161 | + ) |
| 162 | + virtual_table = virtual_table.store() |
| 163 | + print(f"Created Virtual Table with ID: {virtual_table.id}") |
| 164 | + |
| 165 | + virtual_table_id = virtual_table.id |
| 166 | + |
| 167 | + query = f"SELECT * FROM {virtual_table_id}" |
| 168 | + query_result: pd.DataFrame = virtual_table.query( |
| 169 | + query=query, include_row_id_and_row_version=False |
| 170 | + ) |
| 171 | + |
| 172 | + # Print the results to the console |
| 173 | + print("Results from the virtual table with ordering:") |
| 174 | + print(query_result) |
| 175 | + |
| 176 | + |
| 177 | +def create_virtual_table_with_aggregation(): |
| 178 | + """ |
| 179 | + Example: Create a virtual table with an aggregate function. |
| 180 | + """ |
| 181 | + virtual_table = VirtualTable( |
| 182 | + name="Diagnosis Count", |
| 183 | + description="A virtual table showing the count of patients by diagnosis", |
| 184 | + parent_id=project_id, |
| 185 | + defining_sql=f"SELECT diagnosis, COUNT(*) AS patient_count FROM {table1.id} GROUP BY diagnosis", |
| 186 | + ) |
| 187 | + virtual_table = virtual_table.store() |
| 188 | + print(f"Created Virtual Table with ID: {virtual_table.id}") |
| 189 | + |
| 190 | + virtual_table_id = virtual_table.id |
| 191 | + |
| 192 | + query = f"SELECT * FROM {virtual_table_id}" |
| 193 | + query_result: pd.DataFrame = virtual_table.query( |
| 194 | + query=query, include_row_id_and_row_version=False |
| 195 | + ) |
| 196 | + |
| 197 | + # Print the results to the console |
| 198 | + print("Results from the virtual table with aggregation:") |
| 199 | + print(query_result) |
| 200 | + |
| 201 | + |
| 202 | +def main(): |
| 203 | + create_basic_virtual_table() |
| 204 | + create_virtual_table_with_column_selection() |
| 205 | + create_virtual_table_with_filtering() |
| 206 | + create_virtual_table_with_ordering() |
| 207 | + create_virtual_table_with_aggregation() |
| 208 | + |
| 209 | + |
| 210 | +if __name__ == "__main__": |
| 211 | + main() |
0 commit comments