Skip to content

Commit b0ddc9c

Browse files
Tony QiuTony Qiu
authored andcommitted
updated find_fimilar_events to only show similar upcoming events
1 parent a1245e5 commit b0ddc9c

File tree

3 files changed

+53
-46
lines changed

3 files changed

+53
-46
lines changed

backend/example/embedding_utils.py

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,35 @@
88

99

1010
def find_similar_events(
11-
embedding: list[float], threshold: float = 0.985, limit: int = None
11+
embedding: list[float], threshold: float = 0.985, limit: int = None, min_date: str = None
1212
) -> list[dict]:
1313
with connection.cursor() as cursor:
14+
# Base query with date filtering first for performance
15+
base_query = """
16+
SELECT id, 1 - (embedding <=> %s::vector) as similarity
17+
FROM events
18+
WHERE embedding IS NOT NULL
19+
"""
20+
21+
params = [embedding]
22+
23+
# Add date filter if provided to reduce search space
24+
if min_date:
25+
base_query += " AND date >= %s"
26+
params.append(min_date)
27+
28+
# Add similarity threshold
29+
base_query += " AND 1 - (embedding <=> %s::vector) > %s"
30+
params.extend([embedding, threshold])
31+
32+
# Order and limit
33+
base_query += " ORDER BY similarity DESC"
34+
1435
if limit is not None:
15-
cursor.execute(
16-
"""
17-
SELECT id, 1 - (embedding <=> %s::vector) as similarity
18-
FROM events
19-
WHERE embedding IS NOT NULL
20-
AND 1 - (embedding <=> %s::vector) > %s
21-
ORDER BY similarity DESC
22-
LIMIT %s
23-
""",
24-
[embedding, embedding, threshold, limit],
25-
)
26-
else:
27-
cursor.execute(
28-
"""
29-
SELECT id, 1 - (embedding <=> %s::vector) as similarity
30-
FROM events
31-
WHERE embedding IS NOT NULL
32-
AND 1 - (embedding <=> %s::vector) > %s
33-
ORDER BY similarity DESC
34-
""",
35-
[embedding, embedding, threshold],
36-
)
36+
base_query += " LIMIT %s"
37+
params.append(limit)
38+
39+
cursor.execute(base_query, params)
3740

3841
return [
3942
{"id": row[0], "similarity": float(row[1])} for row in cursor.fetchall()

backend/scraping/instagram_feed.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@
1919
from dotenv import load_dotenv
2020
from instaloader import Instaloader
2121

22-
from example.embedding_utils import is_duplicate_event
22+
from example.embedding_utils import is_duplicate_event, find_similar_events
2323
from example.models import Clubs, Events
2424
from services.openai_service import extract_events_from_caption, generate_embedding
25+
from django.db import connection
2526
from services.storage_service import upload_image_from_url
2627

2728
USER_AGENTS = [
@@ -179,16 +180,31 @@ def insert_event_to_db(event_data, club_ig, post_url):
179180
f"Club with handle {club_ig} not found, inserting event with null club_type"
180181
)
181182

182-
# Check duplicates using vector sim
183-
logger.debug(f"Checking duplicates for event with data: {event_data}")
184-
if is_duplicate_event(event_data):
185-
logger.debug(f"Duplicate event found: {event_name} at {event_location}")
186-
return False
187-
188-
# Generate embedding
189183
embedding = generate_embedding(event_data["description"])
190184

191-
# Create event using Django ORM
185+
try:
186+
# Pass event date as min_date to filter out past events first for performance
187+
similar_events = find_similar_events(
188+
embedding, threshold=0.90, limit=10, min_date=event_date
189+
)
190+
candidate_ids = [row["id"] for row in similar_events]
191+
if candidate_ids:
192+
for existing in Events.objects.filter(id__in=candidate_ids, date=event_date):
193+
if (
194+
(existing.location or "") == (event_location or "")
195+
and (existing.start_time or "") == (event_data.get("start_time") or "")
196+
and (
197+
(existing.end_time or None)
198+
== (event_data.get("end_time") or None)
199+
)
200+
):
201+
logger.info(
202+
f"Deleting older duplicate event id={existing.id} before inserting refreshed version"
203+
)
204+
existing.delete()
205+
except Exception as dedup_err:
206+
logger.error(f"Duplicate check via utility failed: {dedup_err}")
207+
192208
Events.objects.create(
193209
club_handle=club_ig,
194210
url=post_url,

frontend/src/hooks/useEvents.ts

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,12 @@ const fetchEvents = async ({
6969
queryKey: string[];
7070
}): Promise<EventsResponse> => {
7171
const searchTerm = queryKey[1] || "";
72-
const startDate = queryKey[2] || "";
7372

7473
const params = new URLSearchParams();
7574

7675
if (searchTerm) {
7776
params.append("search", searchTerm);
7877
}
79-
80-
if (startDate) {
81-
params.append("start_date", startDate);
82-
}
8378

8479
const queryString = params.toString() ? `?${params.toString()}` : "";
8580
const response = await fetch(
@@ -96,15 +91,10 @@ export function useEvents(view: "grid" | "calendar") {
9691
const [searchParams, setSearchParams] = useSearchParams();
9792
const searchTerm = searchParams.get("search") || "";
9893

99-
// Calculate start date based on view
100-
const startDate = view === "grid"
101-
? new Date().toISOString().split('T')[0] // YYYY-MM-DD format for today
102-
: "";
103-
10494
const hasActiveFilters = searchTerm !== "";
10595

10696
const { data, isLoading, error } = useQuery({
107-
queryKey: ["events", searchTerm, startDate],
97+
queryKey: ["events", searchTerm],
10898
queryFn: fetchEvents,
10999
refetchOnWindowFocus: false,
110100
enabled: hasActiveFilters,
@@ -136,9 +126,7 @@ export function useEvents(view: "grid" | "calendar") {
136126
if (searchTerm) {
137127
title = `${events.length} Found Events - Wat2Do`;
138128
} else {
139-
title = view === "grid"
140-
? `${events.length} Upcoming Events - Wat2Do`
141-
: `${events.length} Total Events - Wat2Do`;
129+
title = `${events.length} Upcoming Events - Wat2Do`;
142130
}
143131

144132
if (!isLoadingData) {

0 commit comments

Comments
 (0)