-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathremove_repair_quotes.py
More file actions
32 lines (25 loc) · 1.05 KB
/
remove_repair_quotes.py
File metadata and controls
32 lines (25 loc) · 1.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""Remove repair quotes from existing claims (pre-adjudication cleanup)."""
import json
import glob
import os
claim_files = glob.glob("data/synthetic/CLM-*.json")
removed_count = 0
for claim_file in claim_files:
with open(claim_file, 'r') as f:
claim_data = json.load(f)
original_count = len(claim_data.get("documents", []))
# Remove repair quotes
claim_data["documents"] = [
doc for doc in claim_data["documents"]
if "Repair" not in doc.get("document_type", "") and "REPAIR" not in doc.get("document_type", "").upper()
]
new_count = len(claim_data.get("documents", []))
removed = original_count - new_count
if removed > 0:
removed_count += removed
# Save updated claim
with open(claim_file, 'w') as f:
json.dump(claim_data, f, indent=2, default=str)
print(f"Removed {removed} repair quote(s) from {os.path.basename(claim_file)}")
print(f"\nTotal repair quotes removed: {removed_count}")
print("Existing claims cleaned up for pre-adjudication stage.")