Skip to content

Commit 7b1479f

Browse files
committed
add lineage tree rebuild script
1 parent df9678f commit 7b1479f

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from __future__ import annotations
2+
3+
from django.core.management.base import BaseCommand
4+
from django.db import transaction
5+
6+
from proteins.models import Lineage
7+
8+
9+
class Command(BaseCommand):
10+
help = "Rebuild the MPTT tree structure for Lineage model to fix any corruption"
11+
12+
def add_arguments(self, parser):
13+
parser.add_argument(
14+
"--dry-run",
15+
action="store_true",
16+
help="Show what would be done without making changes",
17+
)
18+
19+
def handle(self, **options):
20+
dry_run = options["dry_run"]
21+
22+
self.stdout.write("Analyzing current Lineage tree structure...")
23+
24+
# Get statistics before rebuild
25+
total_lineages = Lineage.objects.count()
26+
tree_ids = Lineage.objects.values_list("tree_id", flat=True).distinct()
27+
num_trees = len(set(tree_ids))
28+
29+
self.stdout.write(f" Total lineage nodes: {total_lineages}")
30+
self.stdout.write(f" Number of distinct trees: {num_trees}")
31+
32+
# Check for potential corruption: nodes with parents in different trees
33+
corrupt_nodes = []
34+
for lineage in Lineage.objects.select_related("parent").exclude(parent=None):
35+
if lineage.parent and lineage.tree_id != lineage.parent.tree_id:
36+
corrupt_nodes.append(
37+
f" - {lineage.protein.name} (ID {lineage.id}, "
38+
f"tree_id={lineage.tree_id}) has parent "
39+
f"{lineage.parent.protein.name} (ID {lineage.parent.id}, "
40+
f"tree_id={lineage.parent.tree_id})"
41+
)
42+
43+
if corrupt_nodes:
44+
self.stdout.write(self.style.WARNING(f"\nFound {len(corrupt_nodes)} nodes with tree_id inconsistencies:"))
45+
for node in corrupt_nodes[:10]: # Show first 10
46+
self.stdout.write(self.style.WARNING(node))
47+
if len(corrupt_nodes) > 10:
48+
self.stdout.write(self.style.WARNING(f" ... and {len(corrupt_nodes) - 10} more"))
49+
else:
50+
self.stdout.write(self.style.SUCCESS("\n No tree_id inconsistencies detected."))
51+
52+
if dry_run:
53+
self.stdout.write("\n" + self.style.WARNING("DRY RUN: No changes will be made."))
54+
return
55+
56+
self.stdout.write("\nRebuilding MPTT tree structure...")
57+
58+
try:
59+
with transaction.atomic():
60+
Lineage.objects.rebuild()
61+
62+
# Get statistics after rebuild
63+
new_tree_ids = Lineage.objects.values_list("tree_id", flat=True).distinct()
64+
new_num_trees = len(set(new_tree_ids))
65+
66+
self.stdout.write(self.style.SUCCESS("\n✓ MPTT tree rebuild completed successfully!"))
67+
self.stdout.write(f"\n Trees before: {num_trees}")
68+
self.stdout.write(f" Trees after: {new_num_trees}")
69+
70+
if corrupt_nodes:
71+
self.stdout.write(self.style.SUCCESS(f"\n Fixed {len(corrupt_nodes)} tree_id inconsistencies"))
72+
73+
except Exception as e:
74+
self.stdout.write(self.style.ERROR(f"\n✗ Rebuild failed: {e}"))
75+
raise

0 commit comments

Comments
 (0)