From a09b66df547e9b94c4b8e906244bb35c8ab3b7f3 Mon Sep 17 00:00:00 2001 From: y-preethi Date: Tue, 26 May 2026 18:01:53 -0500 Subject: [PATCH] Added issue #117 --- database/mock-data-generation/issue_117.py | 65 ++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 database/mock-data-generation/issue_117.py diff --git a/database/mock-data-generation/issue_117.py b/database/mock-data-generation/issue_117.py new file mode 100644 index 0000000..0d8d3dc --- /dev/null +++ b/database/mock-data-generation/issue_117.py @@ -0,0 +1,65 @@ +import os +import json +import pandas as pd +import random +from faker import Faker + +fake = Faker() + +# Config +NUM_USERS = 40000 +NUM_VOLUNTEERS_ASSIGNED = 25000 +NUM_REQUESTS = 2000 # simulate request table + +# ----------------------------- +# Output Path (FIXED) +# ----------------------------- +output_path = "mock_db" +os.makedirs(output_path, exist_ok=True) # auto-create folder + +# ----------------------------- +# Generate Users (Reference) +# ----------------------------- +users = pd.DataFrame({ + "user_id": range(1, NUM_USERS + 1), + "name": [fake.name() for _ in range(NUM_USERS)], + "email": [fake.unique.email() for _ in range(NUM_USERS)] +}) + +# ----------------------------- +# Generate Requests (Reference) +# ----------------------------- +requests = pd.DataFrame({ + "request_id": range(1, NUM_REQUESTS + 1), + "request_date": [fake.date_between(start_date='-1y', end_date='today') for _ in range(NUM_REQUESTS)] +}) + +# ----------------------------- +# Volunteer Details Table +# ----------------------------- +volunteer_details = pd.DataFrame({ + "user_id": users["user_id"], + "phone": [fake.phone_number() for _ in range(NUM_USERS)], + "address": [fake.address().replace("\n", ", ") for _ in range(NUM_USERS)], + "city": [fake.city() for _ in range(NUM_USERS)], + "state": [fake.state() for _ in range(NUM_USERS)], + "created_at": [fake.date_time_this_year() for _ in range(NUM_USERS)] +}) + +# ----------------------------- +# Volunteers Assigned Table +# ----------------------------- +volunteers_assigned = pd.DataFrame({ + "assignment_id": range(1, NUM_VOLUNTEERS_ASSIGNED + 1), + "request_id": [random.choice(requests["request_id"]) for _ in range(NUM_VOLUNTEERS_ASSIGNED)], + "volunteer_id": [random.choice(users["user_id"]) for _ in range(NUM_VOLUNTEERS_ASSIGNED)], + "assigned_date": [fake.date_between(start_date='-1y', end_date='today') for _ in range(NUM_VOLUNTEERS_ASSIGNED)] +}) + +# ----------------------------- +# Save CSVs (FIXED PATH) +# ----------------------------- +volunteer_details.to_csv(f"{output_path}/volunteer_details.csv", index=False) +volunteers_assigned.to_csv(f"{output_path}/volunteers_assigned.csv", index=False) + +print(" Mock data generated successfully!") \ No newline at end of file