Skip to content

Commit f8196b4

Browse files
authored
apps/summarization: refactor export (#88)
1 parent e27f0a4 commit f8196b4

18 files changed

Lines changed: 946 additions & 852 deletions

File tree

apps/projects/export_utils.py

Lines changed: 0 additions & 846 deletions
This file was deleted.

apps/projects/management/commands/export_project_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from django.core.management.base import BaseCommand
44
from django.db.models import Q
55

6-
from apps.projects.export_utils import generate_full_export
76
from apps.projects.models import Project
7+
from apps.summarization.export_utils.core import generate_full_export
88

99

1010
class Command(BaseCommand):

apps/projects/utils.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,16 @@
66
from sentry_sdk import capture_exception
77

88
from apps.contrib.models import Settings
9+
from apps.summarization.export_utils.attachments.handlers import (
10+
collect_document_attachments,
11+
)
12+
from apps.summarization.export_utils.attachments.handlers import (
13+
integrate_document_summaries,
14+
)
15+
from apps.summarization.export_utils.core import generate_full_export
916
from apps.summarization.pydantic_models import ProjectSummaryResponse
1017
from apps.summarization.services import AIService
1118

12-
from .export_utils import collect_document_attachments
13-
from .export_utils import generate_full_export
14-
from .export_utils import integrate_document_summaries
15-
1619
logger = logging.getLogger(__name__)
1720

1821

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
def _make_absolute_url(attachment_url, request=None, base_url=None):
2+
"""Build absolute URL from attachment_url using request or base_url."""
3+
if request is not None:
4+
return request.build_absolute_uri(attachment_url)
5+
if base_url:
6+
base = base_url.rstrip("/")
7+
path = (
8+
attachment_url
9+
if attachment_url.startswith("/")
10+
else f"/{attachment_url.lstrip('/')}"
11+
)
12+
return f"{base}{path}"
13+
return None
14+
15+
16+
def collect_document_attachments(export_data, request=None, base_url=None):
17+
"""
18+
Collect all document attachments from project fields (information, result).
19+
20+
Args:
21+
export_data: The full export dictionary (as returned by generate_full_export())
22+
request: Optional Django Request object for build_absolute_uri(). If None, base_url is used.
23+
base_url: Optional base URL (e.g. settings.WAGTAILADMIN_BASE_URL) when request is None.
24+
25+
Returns:
26+
tuple: (documents_dict, handle_to_source)
27+
- documents_dict: {handle: absolute_url, ...}
28+
- handle_to_source: {handle: "project_information" | "project_result", ...}
29+
"""
30+
documents_dict = {}
31+
handle_to_source = {}
32+
33+
if request is None and not base_url:
34+
return documents_dict, handle_to_source
35+
36+
project_data = export_data.get("project", {})
37+
38+
# Collect attachments from information field
39+
information_attachments = project_data.get("information_attachments", [])
40+
for attachment_index, attachment_url in enumerate(information_attachments):
41+
handle = f"project_information_attachment_{attachment_index}"
42+
absolute_url = _make_absolute_url(
43+
attachment_url, request=request, base_url=base_url
44+
)
45+
if absolute_url:
46+
documents_dict[handle] = absolute_url
47+
handle_to_source[handle] = "project_information"
48+
49+
# Collect attachments from result field
50+
result_attachments = project_data.get("result_attachments", [])
51+
for attachment_index, attachment_url in enumerate(result_attachments):
52+
handle = f"project_result_attachment_{attachment_index}"
53+
absolute_url = _make_absolute_url(
54+
attachment_url, request=request, base_url=base_url
55+
)
56+
if absolute_url:
57+
documents_dict[handle] = absolute_url
58+
handle_to_source[handle] = "project_result"
59+
60+
return documents_dict, handle_to_source
61+
62+
63+
def integrate_document_summaries(
64+
export_data: dict,
65+
document_summaries: list,
66+
handle_to_source: dict[str, str],
67+
):
68+
"""
69+
Integrate document summaries into export_data by project field source.
70+
71+
Args:
72+
export_data: Export dictionary (modified in-place)
73+
document_summaries: List of DocumentSummaryItem objects
74+
handle_to_source: Mapping from handle to source field ("project_information", "project_result")
75+
"""
76+
# Initialize document_summaries structure
77+
project_summaries = {
78+
"information": [],
79+
"result": [],
80+
}
81+
82+
# Group summaries by source field
83+
for summary_item in document_summaries:
84+
handle = summary_item.handle
85+
source = handle_to_source.get(handle)
86+
87+
if source == "project_information":
88+
project_summaries["information"].append(
89+
{
90+
"handle": summary_item.handle,
91+
"summary": summary_item.summary,
92+
}
93+
)
94+
elif source == "project_result":
95+
project_summaries["result"].append(
96+
{
97+
"handle": summary_item.handle,
98+
"summary": summary_item.summary,
99+
}
100+
)
101+
102+
# Integrate summaries into export_data
103+
if "project" not in export_data:
104+
export_data["project"] = {}
105+
export_data["project"]["document_summaries"] = project_summaries
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
from adhocracy4.polls.models import Poll
2+
from apps.budgeting.models import Proposal
3+
from apps.debate.models import Subject
4+
from apps.documents.models import Chapter
5+
from apps.ideas.models import Idea
6+
from apps.mapideas.models import MapIdea
7+
from apps.offlineevents.models import OfflineEvent
8+
from apps.topicprio.models import Topic
9+
10+
from .models.debates import export_debate
11+
from .models.documents import export_document_chapters
12+
from .models.ideas import export_idea
13+
from .models.mapideas import export_mapidea
14+
from .models.offline_events import export_offline_event
15+
from .models.polls import export_poll
16+
from .models.proposals import export_proposal
17+
from .models.topics import export_topic
18+
from .processing.cleaning import clean_export
19+
from .processing.extractors import extract_attachments
20+
from .processing.grouping import restructure_by_phase
21+
from .processing.module_utils import get_module_status
22+
from .processing.module_utils import get_module_type_from_name
23+
24+
25+
def generate_full_export(project):
26+
"""Generate complete project export data - module first approach"""
27+
from adhocracy4.modules.models import Module
28+
29+
# Project metadata
30+
project_data = {
31+
"name": project.name,
32+
"description": project.description,
33+
"description_attachments": extract_attachments(project.description),
34+
"information": getattr(project, "information", None),
35+
"information_attachments": extract_attachments(
36+
getattr(project, "information", "")
37+
),
38+
"slug": project.slug,
39+
"organisation": project.organisation.name,
40+
"result": project.result,
41+
"result_attachments": extract_attachments(project.result),
42+
"url": project.get_absolute_url(),
43+
}
44+
45+
modules_data = []
46+
for module in Module.objects.filter(project=project, is_draft=False):
47+
module_data = {
48+
"module_id": module.id,
49+
"module_name": module.name,
50+
"module_type": get_module_type_from_name(module.name),
51+
"active_status": get_module_status(module),
52+
"module_start": str(module.module_start),
53+
"module_end": str(module.module_end),
54+
"description": module.description,
55+
"url": module.get_absolute_url(),
56+
"content": {},
57+
}
58+
59+
# Ideas
60+
ideas = Idea.objects.filter(module=module)
61+
if ideas.exists():
62+
module_data["content"]["ideas"] = [export_idea(i) for i in ideas]
63+
64+
# MapIdeas
65+
mapideas = (
66+
MapIdea.objects.filter(module__project=project)
67+
.select_related("category")
68+
.prefetch_related("labels")
69+
)
70+
if mapideas.exists():
71+
module_data["content"]["mapideas"] = [export_mapidea(m) for m in mapideas]
72+
73+
# Polls
74+
polls = Poll.objects.filter(module=module).prefetch_related(
75+
"questions__choices__votes__other_vote",
76+
)
77+
if polls.exists():
78+
module_data["content"]["polls"] = [export_poll(p) for p in polls]
79+
80+
# Topics
81+
topics = (
82+
Topic.objects.filter(module=module)
83+
.select_related("category")
84+
.prefetch_related("labels")
85+
)
86+
if topics.exists():
87+
module_data["content"]["topics"] = [export_topic(t) for t in topics]
88+
89+
# Proposals
90+
proposals = (
91+
Proposal.objects.filter(module=module)
92+
.select_related("category")
93+
.prefetch_related("labels")
94+
)
95+
if proposals.exists():
96+
module_data["content"]["proposals"] = [
97+
export_proposal(p) for p in proposals
98+
]
99+
100+
# Debates
101+
debates = Subject.objects.filter(module=module)
102+
if debates.exists():
103+
module_data["content"]["debates"] = [export_debate(d) for d in debates]
104+
105+
# Documents
106+
if Chapter.objects.filter(module=module).exists():
107+
module_data["content"]["documents"] = export_document_chapters(module)
108+
109+
modules_data.append(module_data)
110+
111+
# Offline events
112+
offline_events = []
113+
for event in OfflineEvent.objects.filter(project=project):
114+
offline_events.append(export_offline_event(event))
115+
116+
export_data = {
117+
"project": project_data,
118+
"modules": modules_data,
119+
"offline_events": offline_events,
120+
}
121+
122+
structured_result = restructure_by_phase(export_data)
123+
cleaned_result = clean_export(structured_result)
124+
return cleaned_result
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from ..processing.extractors import extract_comments
2+
3+
4+
def export_debate(debate):
5+
"""Export a single debate subject with all its data."""
6+
return {
7+
"id": debate.id,
8+
"name": debate.name,
9+
"description": debate.description,
10+
# "created": debate.created.isoformat(),
11+
"reference_number": debate.reference_number,
12+
"slug": debate.slug,
13+
"comment_count": debate.comments.count(),
14+
"comments": extract_comments(debate.comments.all()),
15+
"comment_creator_count": debate.comment_creator_count,
16+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from apps.documents.models import Chapter
2+
3+
from ..processing.extractors import extract_attachments
4+
from ..processing.extractors import extract_comments
5+
6+
7+
def export_paragraph(paragraph):
8+
"""Export a single paragraph."""
9+
return {
10+
"id": paragraph.id,
11+
"name": paragraph.name,
12+
"text": str(paragraph.text),
13+
"attachments": extract_attachments(str(paragraph.text)),
14+
"weight": paragraph.weight,
15+
# "created": paragraph.created.isoformat(),
16+
"comment_count": paragraph.comments.count(),
17+
"comments": extract_comments(paragraph.comments.all()),
18+
}
19+
20+
21+
def export_document_chapters(module):
22+
"""Export all chapters and paragraphs for a module."""
23+
chapters_data = []
24+
chapters = Chapter.objects.filter(module=module).order_by("weight")
25+
26+
for chapter in chapters:
27+
chapters_data.append(
28+
{
29+
"id": chapter.id,
30+
"name": chapter.name,
31+
"url": chapter.get_absolute_url(),
32+
"weight": chapter.weight,
33+
# "created": chapter.created.isoformat(),
34+
"prev_chapter_id": chapter.prev.id if chapter.prev else None,
35+
"next_chapter_id": chapter.next.id if chapter.next else None,
36+
"paragraph_count": chapter.paragraphs.count(),
37+
"paragraphs": [
38+
export_paragraph(p)
39+
for p in chapter.paragraphs.all().order_by("weight")
40+
],
41+
"chapter_comment_count": chapter.comments.count(),
42+
"chapter_comments": extract_comments(chapter.comments.all()),
43+
}
44+
)
45+
46+
return chapters_data
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from ..processing.extractors import extract_attachments
2+
from ..processing.extractors import extract_comments
3+
from ..processing.extractors import extract_ratings
4+
5+
6+
def export_idea(idea):
7+
"""Export a single idea with all its data."""
8+
return {
9+
"id": idea.id,
10+
"name": idea.name,
11+
"description": str(idea.description),
12+
"attachments": extract_attachments(str(idea.description)),
13+
# "created": idea.created.isoformat(),
14+
"reference_number": idea.reference_number,
15+
"category": idea.category.name if idea.category else None,
16+
"labels": [label.name for label in idea.labels.all()],
17+
"comment_count": idea.comments.count(),
18+
"comments": extract_comments(idea.comments.all()),
19+
"rating_count": idea.ratings.count(),
20+
"ratings": extract_ratings(idea.ratings.all()),
21+
"images": [i.name for i in idea._a4images_current_images],
22+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from .ideas import export_idea
2+
3+
4+
def export_mapidea(mapidea):
5+
"""Export a single map idea with all its data."""
6+
data = export_idea(mapidea) # Reuse base idea export
7+
8+
# Handle point - could be Point object or dict
9+
point = None
10+
if mapidea.point:
11+
if hasattr(mapidea.point, "y"): # It's a Point object
12+
point = {
13+
"lat": mapidea.point.y,
14+
"lng": mapidea.point.x,
15+
}
16+
elif isinstance(mapidea.point, dict): # It's already a dict
17+
point = {
18+
"lat": mapidea.point.get("y") or mapidea.point.get("lat"),
19+
"lng": mapidea.point.get("x") or mapidea.point.get("lng"),
20+
}
21+
22+
data.update(
23+
{
24+
"point": point,
25+
"point_label": mapidea.point_label,
26+
}
27+
)
28+
29+
return data
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from ..processing.extractors import extract_attachments
2+
3+
4+
def export_offline_event(event):
5+
"""Export a single offline event."""
6+
return {
7+
"id": event.id,
8+
"name": event.name,
9+
"event_type": event.event_type,
10+
"date": event.date.isoformat(),
11+
"description": str(event.description),
12+
"attachments": extract_attachments(str(event.description)),
13+
"slug": event.slug,
14+
"url": event.get_absolute_url(),
15+
"timeline_index": event.get_timeline_index,
16+
# "created": event.created.isoformat(),
17+
"modified": event.modified.isoformat() if event.modified else None,
18+
}

0 commit comments

Comments
 (0)