Skip to content

Commit 8020ab5

Browse files
add multi-schema designs
1 parent 9b611db commit 8020ab5

File tree

3 files changed

+122
-52
lines changed

3 files changed

+122
-52
lines changed

book/30-schema-design/030-foreign-keys.ipynb

Lines changed: 30 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,28 @@
3333
},
3434
{
3535
"cell_type": "code",
36-
"execution_count": 7,
36+
"execution_count": 1,
3737
"metadata": {},
38-
"outputs": [],
38+
"outputs": [
39+
{
40+
"name": "stdout",
41+
"output_type": "stream",
42+
"text": [
43+
"Exception reporting mode: Minimal\n"
44+
]
45+
},
46+
{
47+
"name": "stderr",
48+
"output_type": "stream",
49+
"text": [
50+
"[2024-10-21 04:07:03,488][INFO]: Connecting root@localhost:3306\n",
51+
"[2024-10-21 04:07:03,506][INFO]: Connected root@localhost:3306\n"
52+
]
53+
}
54+
],
3955
"source": [
56+
"%xmode Minimal\n",
57+
"\n",
4058
"import datajoint as dj\n",
4159
"schema = dj.Schema('company')\n",
4260
"\n",
@@ -83,7 +101,7 @@
83101
},
84102
{
85103
"cell_type": "code",
86-
"execution_count": 8,
104+
"execution_count": 2,
87105
"metadata": {},
88106
"outputs": [
89107
{
@@ -119,10 +137,10 @@
119137
"</svg>"
120138
],
121139
"text/plain": [
122-
"<datajoint.diagram.Diagram at 0x7f7092233b50>"
140+
"<datajoint.diagram.Diagram at 0x7fe136962e50>"
123141
]
124142
},
125-
"execution_count": 8,
143+
"execution_count": 2,
126144
"metadata": {},
127145
"output_type": "execute_result"
128146
}
@@ -156,7 +174,7 @@
156174
},
157175
{
158176
"cell_type": "code",
159-
"execution_count": 6,
177+
"execution_count": 3,
160178
"metadata": {},
161179
"outputs": [
162180
{
@@ -243,7 +261,7 @@
243261
" (Total: 0)"
244262
]
245263
},
246-
"execution_count": 6,
264+
"execution_count": 3,
247265
"metadata": {},
248266
"output_type": "execute_result"
249267
}
@@ -265,7 +283,7 @@
265283
},
266284
{
267285
"cell_type": "code",
268-
"execution_count": 11,
286+
"execution_count": 4,
269287
"metadata": {},
270288
"outputs": [],
271289
"source": [
@@ -275,22 +293,15 @@
275293
},
276294
{
277295
"cell_type": "code",
278-
"execution_count": 12,
296+
"execution_count": 5,
279297
"metadata": {},
280298
"outputs": [
281299
{
282300
"ename": "IntegrityError",
283301
"evalue": "Cannot add or update a child row: a foreign key constraint fails (`company`.`employee`, CONSTRAINT `employee_ibfk_1` FOREIGN KEY (`title_code`) REFERENCES `#title` (`title_code`) ON DELETE RESTRICT ON UPDATE CASCADE)",
284302
"output_type": "error",
285303
"traceback": [
286-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
287-
"\u001b[0;31mIntegrityError\u001b[0m Traceback (most recent call last)",
288-
"Cell \u001b[0;32mIn[12], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# This fails\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mEmployee\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minsert1\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mBrenda\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMeans\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mBizDev\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
289-
"File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/datajoint/table.py:347\u001b[0m, in \u001b[0;36mTable.insert1\u001b[0;34m(self, row, **kwargs)\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minsert1\u001b[39m(\u001b[38;5;28mself\u001b[39m, row, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 341\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 342\u001b[0m \u001b[38;5;124;03m Insert one data record into the table. For ``kwargs``, see ``insert()``.\u001b[39;00m\n\u001b[1;32m 343\u001b[0m \n\u001b[1;32m 344\u001b[0m \u001b[38;5;124;03m :param row: a numpy record, a dict-like object, or an ordered sequence to be inserted\u001b[39;00m\n\u001b[1;32m 345\u001b[0m \u001b[38;5;124;03m as one row.\u001b[39;00m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 347\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minsert\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrow\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
290-
"File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/datajoint/table.py:451\u001b[0m, in \u001b[0;36mTable.insert\u001b[0;34m(self, rows, replace, skip_duplicates, ignore_extra_fields, allow_direct_insert)\u001b[0m\n\u001b[1;32m 435\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 436\u001b[0m query \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{command}\u001b[39;00m\u001b[38;5;124m INTO \u001b[39m\u001b[38;5;132;01m{destination}\u001b[39;00m\u001b[38;5;124m(`\u001b[39m\u001b[38;5;132;01m{fields}\u001b[39;00m\u001b[38;5;124m`) VALUES \u001b[39m\u001b[38;5;132;01m{placeholders}\u001b[39;00m\u001b[38;5;132;01m{duplicate}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m 437\u001b[0m command\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mREPLACE\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m replace \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mINSERT\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 438\u001b[0m destination\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfrom_clause(),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 449\u001b[0m ),\n\u001b[1;32m 450\u001b[0m )\n\u001b[0;32m--> 451\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 452\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 453\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 454\u001b[0m \u001b[43m \u001b[49m\u001b[43mitertools\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_iterable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mr\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvalues\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mr\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrows\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m UnknownAttributeError \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 460\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\u001b[38;5;241m.\u001b[39msuggest(\n\u001b[1;32m 461\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTo ignore extra fields in insert, set ignore_extra_fields=True\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 462\u001b[0m )\n",
291-
"File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/datajoint/connection.py:343\u001b[0m, in \u001b[0;36mConnection.query\u001b[0;34m(self, query, args, as_dict, suppress_warnings, reconnect)\u001b[0m\n\u001b[1;32m 341\u001b[0m cursor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_conn\u001b[38;5;241m.\u001b[39mcursor(cursor\u001b[38;5;241m=\u001b[39mcursor_class)\n\u001b[1;32m 342\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 343\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuppress_warnings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m errors\u001b[38;5;241m.\u001b[39mLostConnectionError:\n\u001b[1;32m 345\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m reconnect:\n",
292-
"File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/datajoint/connection.py:299\u001b[0m, in \u001b[0;36mConnection._execute_query\u001b[0;34m(cursor, query, args, suppress_warnings)\u001b[0m\n\u001b[1;32m 297\u001b[0m cursor\u001b[38;5;241m.\u001b[39mexecute(query, args)\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m client\u001b[38;5;241m.\u001b[39merr\u001b[38;5;241m.\u001b[39mError \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 299\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m translate_query_error(err, query)\n",
293-
"\u001b[0;31mIntegrityError\u001b[0m: Cannot add or update a child row: a foreign key constraint fails (`company`.`employee`, CONSTRAINT `employee_ibfk_1` FOREIGN KEY (`title_code`) REFERENCES `#title` (`title_code`) ON DELETE RESTRICT ON UPDATE CASCADE)"
304+
"\u001b[0;31mIntegrityError\u001b[0m\u001b[0;31m:\u001b[0m Cannot add or update a child row: a foreign key constraint fails (`company`.`employee`, CONSTRAINT `employee_ibfk_1` FOREIGN KEY (`title_code`) REFERENCES `#title` (`title_code`) ON DELETE RESTRICT ON UPDATE CASCADE)\n"
294305
]
295306
}
296307
],
@@ -314,29 +325,9 @@
314325
},
315326
{
316327
"cell_type": "code",
317-
"execution_count": 13,
328+
"execution_count": null,
318329
"metadata": {},
319-
"outputs": [
320-
{
321-
"name": "stderr",
322-
"output_type": "stream",
323-
"text": [
324-
"[2024-10-21 02:56:58,805][INFO]: Deleting 1 rows from `company`.`employee`\n",
325-
"[2024-10-21 02:56:58,818][INFO]: Deleting 7 rows from `company`.`#title`\n",
326-
"[2024-10-21 02:57:03,935][WARNING]: Deletes cancelled\n"
327-
]
328-
},
329-
{
330-
"data": {
331-
"text/plain": [
332-
"7"
333-
]
334-
},
335-
"execution_count": 13,
336-
"metadata": {},
337-
"output_type": "execute_result"
338-
}
339-
],
330+
"outputs": [],
340331
"source": [
341332
"Title.delete()"
342333
]
@@ -375,13 +366,6 @@
375366
"4. **Update Restriction**: Updates to the primary key in the parent table and foreign key in the child table are restricted to maintain consistency, unless cascading updates are explicitly allowed.\n",
376367
"5. **Performance Optimization**: An index on the foreign key in the child table ensures fast searches and efficient joins. \n"
377368
]
378-
},
379-
{
380-
"cell_type": "code",
381-
"execution_count": null,
382-
"metadata": {},
383-
"outputs": [],
384-
"source": []
385369
}
386370
],
387371
"metadata": {

book/30-schema-design/032-schema-modules.ipynb

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"# Schema Modules\n",
8-
"\n",
9-
"\n"
7+
"# Multi-Schema Designs"
108
]
119
},
1210
{
@@ -17,13 +15,88 @@
1715
}
1816
},
1917
"source": [
20-
"A large database schema can be composed of multiple modules. We often call each module a schema "
18+
"# Defining Complex Databases with Multiple Schemas in DataJoint\n",
19+
"\n",
20+
"In DataJoint, defining **multiple schemas across separate Python modules** ensures that large, complex projects remain well-organized, modular, and maintainable. Each schema should be defined in a **dedicated Python module** to adhere to best practices. This structure ensures that every module maintains **only one `schema` object**, and **downstream schemas import upstream schemas** to manage dependencies correctly. This approach improves code clarity, enables better version control, and simplifies collaboration across teams.\n",
21+
"\n",
22+
"\n",
23+
"## 1. Why Use Multiple Schemas in Separate Modules?\n",
24+
"\n",
25+
"Using multiple schemas across separate modules offers the following benefits:\n",
26+
"\n",
27+
"1. **Modularity and Code Organization**: Each module contains only the tables relevant to a specific schema, making the codebase easier to manage and navigate.\n",
28+
"2. **Clear Boundaries Between Schemas**: Ensures a separation of concerns, where each schema focuses on a specific aspect of the pipeline (e.g., acquisition, processing, analysis).\n",
29+
"3. **Dependency Management**: Downstream schemas explicitly **import upstream schemas** to manage table dependencies and data flow.\n",
30+
"4. **Collaboration**: Multiple developers or teams can work on separate modules without conflicts.\n",
31+
"5. **Scalability and Maintainability**: Isolating schemas into modules simplifies future updates and troubleshooting.\n",
32+
"\n",
33+
"\n",
34+
"## 2. How to Structure Modules for Multiple Schemas\n",
35+
"\n",
36+
"Below is an example that demonstrates how to organize multiple schemas in separate Python modules.\n",
37+
"\n",
38+
"### Project Structure\n",
39+
"\n",
40+
"```\n",
41+
"my_pipeline/\n",
42+
"\n",
43+
"├── subject.py # Defines subject_management schema\n",
44+
"├── acquisition.py # Defines acquisition schema (depends on subject_management)\n",
45+
"├── processing.py # Defines processing schema (depends on acquisition)\n",
46+
"└── analysis.py # Defines analysis schema (depends on processing)\n",
47+
"```\n",
48+
"\n",
49+
"### Step-by-Step Implementation\n",
50+
"\n",
51+
"1. **Define** `subject_management.py`\n",
52+
"This module defines the subject_management schema and contains the Subject table.\n"
53+
]
54+
},
55+
{
56+
"cell_type": "markdown",
57+
"metadata": {},
58+
"source": []
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": 16,
63+
"metadata": {},
64+
"outputs": [
65+
{
66+
"name": "stdout",
67+
"output_type": "stream",
68+
"text": [
69+
"\u001b[0;32mimport\u001b[0m \u001b[0mdatajoint\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mdj\u001b[0m\u001b[0;34m\u001b[0m\n",
70+
"\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
71+
"\u001b[0;34m\u001b[0m\u001b[0;31m# Define the subject management schema\u001b[0m\u001b[0;34m\u001b[0m\n",
72+
"\u001b[0;34m\u001b[0m\u001b[0mschema\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSchema\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"subject_management\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
73+
"\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
74+
"\u001b[0;34m\u001b[0m\u001b[0;34m@\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m\u001b[0m\n",
75+
"\u001b[0;34m\u001b[0m\u001b[0;32mclass\u001b[0m \u001b[0mSubject\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mManual\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
76+
"\u001b[0;34m\u001b[0m \u001b[0mdefinition\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"\"\"\u001b[0m\n",
77+
"\u001b[0;34m subject_id : int\u001b[0m\n",
78+
"\u001b[0;34m ---\u001b[0m\n",
79+
"\u001b[0;34m subject_name : varchar(50)\u001b[0m\n",
80+
"\u001b[0;34m species : varchar(50)\u001b[0m\n",
81+
"\u001b[0;34m \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n"
82+
]
83+
}
84+
],
85+
"source": [
86+
"%pycat code/subject.py"
2187
]
88+
},
89+
{
90+
"cell_type": "code",
91+
"execution_count": null,
92+
"metadata": {},
93+
"outputs": [],
94+
"source": []
2295
}
2396
],
2497
"metadata": {
2598
"kernelspec": {
26-
"display_name": "Python 3",
99+
"display_name": "base",
27100
"language": "python",
28101
"name": "python3"
29102
},
@@ -37,7 +110,7 @@
37110
"name": "python",
38111
"nbconvert_exporter": "python",
39112
"pygments_lexer": "ipython3",
40-
"version": "3.9.17"
113+
"version": "3.11.10"
41114
},
42115
"orig_nbformat": 4
43116
},
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import datajoint as dj
2+
3+
# Define the subject management schema
4+
schema = dj.Schema("subject_management")
5+
6+
@schema
7+
class Subject(dj.Manual):
8+
definition = """
9+
subject_id : int
10+
---
11+
subject_name : varchar(50)
12+
species : varchar(50)
13+
"""

0 commit comments

Comments
 (0)