-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhugging_face.py
More file actions
28 lines (23 loc) · 939 Bytes
/
hugging_face.py
File metadata and controls
28 lines (23 loc) · 939 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import json
from datasets import Dataset
from huggingface_hub import login
jsonl_file = "crime_training_data.jsonl"
# Login to Hugging Face (you'll need an access token from huggingface.co)
login(token="os.environ.get('HF_TOKEN')")
# Load the JSONL file into a Hugging Face dataset
def create_hf_dataset(jsonl_file):
"""Create a Hugging Face dataset from a JSONL file"""
# Read the JSONL file
with open(jsonl_file, 'r', encoding='utf-8') as f:
data = [json.loads(line) for line in f]
# Create dataset
dataset = Dataset.from_list(data)
# Push to hub
dataset.push_to_hub(
"your-org/crime-entity-dataset", # Replace with your desired repo name
private=False # Set to False if you want a public dataset
)
return dataset
# Create and upload the dataset after processing
dataset = create_hf_dataset(jsonl_file)
print(f"Dataset uploaded with {len(dataset)} examples")