Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ install-scispacy:
start:
python ./web/manage.py runserver

populate-db:
import-wikidata:
python ./web/manage.py import_wikidata

clear-db:
Expand All @@ -46,3 +46,6 @@ create-migrations:

migrate:
python ./web/manage.py migrate

import-lmfdb:
python ./web/manage.py import_lmfdb
18 changes: 18 additions & 0 deletions web/concepts/migrations/0020_alter_item_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.28 on 2026-04-22 07:46

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('concepts', '0019_remove_concept_unique_lower_name_concept_normal_name_and_more'),
]

operations = [
migrations.AlterField(
model_name='item',
name='source',
field=models.CharField(choices=[('Wd', 'Wikidata'), ('nL', 'nLab'), ('MW', 'MathWorld'), ('PW', 'ProofWiki'), ('EoM', 'Encyclopedia of Mathematics'), ('WpEN', 'Wikipedia (English)'), ('AUm', 'Agda Unimath'), ('LMF', 'The L-functions and modular forms database')], max_length=4),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Generated by Django 4.2.28 on 2026-04-24 10:08

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('concepts', '0020_alter_item_source'),
]

operations = [
migrations.AlterField(
model_name='item',
name='identifier',
field=models.CharField(max_length=300),
),
migrations.AlterField(
model_name='item',
name='name',
field=models.CharField(max_length=300, null=True),
),
migrations.AlterField(
model_name='item',
name='url',
field=models.URLField(max_length=300),
),
]
8 changes: 5 additions & 3 deletions web/concepts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class Source(models.TextChoices):
ENCYCLOPEDIA_OF_MATHEMATICS = "EoM", "Encyclopedia of Mathematics"
WIKIPEDIA_EN = "WpEN", "Wikipedia (English)"
AGDA_UNIMATH = "AUm", "Agda Unimath"
LMFDB = "LMF", "The L-functions and modular forms database"

@staticmethod
def key():
Expand All @@ -87,16 +88,17 @@ def key():
Item.Source.PROOF_WIKI,
Item.Source.ENCYCLOPEDIA_OF_MATHEMATICS,
Item.Source.AGDA_UNIMATH,
Item.Source.LMFDB,
]
return lambda item: SOURCES.index(item.source)

domain = models.CharField(
max_length=4, choices=Domain.choices, default=Domain.MATHEMATICS
)
source = models.CharField(max_length=4, choices=Source.choices)
identifier = models.CharField(max_length=200)
url = models.URLField(max_length=200)
name = models.CharField(max_length=200, null=True)
identifier = models.CharField(max_length=300)
url = models.URLField(max_length=300)
name = models.CharField(max_length=300, null=True)
description = models.TextField(null=True)
keywords = models.TextField(null=True, blank=True)
article_text = models.TextField(null=True, blank=True)
Expand Down
1 change: 1 addition & 0 deletions web/concepts/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ <h2>About</h2>
<li><a href="https://proofwiki.org/wiki/Main_Page">ProofWiki</a> ({{ number_of_links.proof_wiki }} entries),</li>
<li><a href="https://encyclopediaofmath.org/wiki/Main_Page">Encyclopedia of mathematics</a> ({{ number_of_links.encyclopedia_of_mathematics }} entries),</li>
<li><a href="https://unimath.github.io/agda-unimath/">Agda Unimath</a> ({{ number_of_links.agda_unimath }} entries)</li>
<li><a href="https://www.lmfdb.org/">LMFDB</a> ({{ number_of_links.lmfdb }} entries)</li>
</ul>
<p>The entries are organized into a concept network that connects the same concept appearing in different sources.</p>
<p>The team: <a href="https://katja.not.si">Katja Berčič</a> and Slobodan Stanojevikj.</p>
Expand Down
1 change: 1 addition & 0 deletions web/concepts/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def home(request):
"agda_unimath": Item.objects.filter(
source=Item.Source.AGDA_UNIMATH
).count(),
"lmfdb": Item.objects.filter(source=Item.Source.LMFDB).count(),
},
}
return render(request, "index.html", context)
Expand Down
1 change: 1 addition & 0 deletions web/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ spacy~=3.7.0 --prefer-binary
scispacy~=0.6.2
python-decouple~=3.8
unidecode~=1.4.0
lmfdb-lite[pgbinary] @ git+https://github.com/roed314/lmfdb-lite.git
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

☝️ ...this dependency


# LLM dependencies (optional, install based on which LLM you want to use)
# For paid APIs:
Expand Down
46 changes: 46 additions & 0 deletions web/slurper/management/commands/clear_lmfdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import logging
import sys
from datetime import timedelta

from concepts.models import Item
from django.core.management.base import BaseCommand
from slurper.models import SlurperRun

MIN_INTERVAL = timedelta(days=7)


class Command(BaseCommand):
help = (
"Delete all LMFDB items. Guarded by a 7-day throttle; use --force to override."
)

def add_arguments(self, parser):
parser.add_argument(
"--force",
action="store_true",
help="Clear even if the LMFDB slurper ran within the last 7 days.",
)

def handle(self, *args, force=False, **options):
source = Item.Source.LMFDB
if not force and not SlurperRun.can_run(source, MIN_INTERVAL):
if sys.stdin.isatty():
answer = (
input(
f"LMFDB slurper ran within the last {MIN_INTERVAL.days} days. "
f"Clear anyway? [y/N] "
)
.strip()
.lower()
)
if answer not in ("y", "yes"):
logging.info(f"[{source.label}] clear cancelled.")
return
else:
logging.info(
f"[{source.label}] clear skipped: ran less than "
f"{MIN_INTERVAL.days} days ago (use --force to override)."
)
return
deleted, _ = Item.objects.filter(source=source).delete()
logging.info(f"[{source.label}] cleared {deleted} items.")
14 changes: 14 additions & 0 deletions web/slurper/management/commands/import_lmfdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from django.core.management.base import BaseCommand
from slurper import source_lmfdb


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--force",
action="store_true",
help="Bypass the 7-day throttle and run anyway.",
)

def handle(self, *args, force=False, **options):
source_lmfdb.LMFDB_SLURPER.save_items(force=force)
22 changes: 22 additions & 0 deletions web/slurper/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.28 on 2026-04-24 15:38

from django.db import migrations, models


class Migration(migrations.Migration):

initial = True

dependencies = [
]

operations = [
migrations.CreateModel(
name='SlurperRun',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source', models.CharField(max_length=8, unique=True)),
('last_succeeded_at', models.DateTimeField()),
],
),
]
Empty file.
26 changes: 26 additions & 0 deletions web/slurper/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from datetime import timedelta

from django.db import models
from django.utils import timezone


class SlurperRun(models.Model):
"""Tracks the last successful run of a named slurper for throttling."""

source = models.CharField(max_length=8, unique=True)
last_succeeded_at = models.DateTimeField()

@classmethod
def can_run(cls, source: str, min_interval: timedelta) -> bool:
try:
last = cls.objects.get(source=source).last_succeeded_at
except cls.DoesNotExist:
return True
return timezone.now() - last >= min_interval

@classmethod
def mark_ran(cls, source: str) -> None:
cls.objects.update_or_create(
source=source,
defaults={"last_succeeded_at": timezone.now()},
)
57 changes: 57 additions & 0 deletions web/slurper/source_lmfdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import logging
from datetime import timedelta

from concepts.models import Item
from django.db.utils import IntegrityError
from psycopg2.sql import SQL
from slurper.models import SlurperRun


class LmfdbSlurper:
KNOWL_URL_PREFIX = "https://www.lmfdb.org/knowledge/show/"
MIN_INTERVAL = timedelta(days=7)

def __init__(self):
self.source = Item.Source.LMFDB

def fetch_rows(self):
from lmf import db
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an import for this, seems that the correct library is imported: https://github.com/roed314/lmfdb-lite


cur = db._execute(SQL("SELECT id, title, content FROM kwl_knowls"))
columns = [desc[0] for desc in cur.description]
for row in cur:
yield dict(zip(columns, row))

def row_to_item(self, row) -> Item:
return Item(
source=self.source,
identifier=row["id"],
url=self.KNOWL_URL_PREFIX + row["id"],
name=row["title"],
description=row["content"],
)

def save_items(self, force: bool = False):
if not force and not SlurperRun.can_run(self.source, self.MIN_INTERVAL):
logging.info(
f"[{self.source.label}] skipped: ran less than "
f"{self.MIN_INTERVAL.days} days ago (use --force to override)."
)
return
total_saved = 0
for row in self.fetch_rows():
item = self.row_to_item(row)
try:
item.save()
total_saved += 1
except IntegrityError:
logging.info(
f"Item {item.source} {item.identifier} is already in the database."
)
SlurperRun.mark_ran(self.source)
logging.info(
f"[{self.source.label}] save_items finished: {total_saved} items saved."
)


LMFDB_SLURPER = LmfdbSlurper()
12 changes: 12 additions & 0 deletions web/web/management/commands/rebuild_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ class Command(BaseCommand):
def handle(self, *args, **options):
print("clearing data: agda-unimath")
call_command("clear_agda_unimath")
print(
"clearing data: LMFDB "
"(skipped if the LMFDB slurper ran in the last 7 days; "
"use `clear_lmfdb --force` to override)"
)
call_command("clear_lmfdb")
print("clearing data: Wikidata")
call_command("clear_wikidata")
print("clearing data: concepts")
Expand All @@ -15,6 +21,12 @@ def handle(self, *args, **options):
call_command("import_wikidata")
print("importing data: agda-unimath")
call_command("import_agda_unimath")
print(
"importing data: LMFDB "
"(skipped if the LMFDB slurper ran in the last 7 days; "
"use `import_lmfdb --force` to override)"
)
call_command("import_lmfdb")
print("linking: items with the same name")
call_command("link_same")
print("computing concepts")
Expand Down