-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgenerate-training-text.py
More file actions
38 lines (31 loc) · 1.12 KB
/
generate-training-text.py
File metadata and controls
38 lines (31 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import random
import os
output_file = "data/training_text"
alphabet = "abcdefghijklmnopqrstuvwxyz"
# Replace with relevant characters
chars = "1234567890/.,:[]()?!+-#"
chars += alphabet
chars += alphabet.upper()
try:
num_samples = int(input("Input number of lines to generate [Defaults to 100]: "))
except ValueError:
num_samples = 100
def generate_sample():
# Generate a random string of characters
sample_length = random.randint(5, 20) # You can adjust the length range as needed
sample = ''.join(random.choice(chars) for _ in range(sample_length))
return sample
def main():
if os.path.exists(output_file):
overwrite = input("training_text.txt already exists. Do you want to overwrite it? (y/n): ").lower()
if overwrite != 'y':
print("Aborting operation.")
return
with open(output_file, "w") as f:
for _ in range(num_samples):
sample = generate_sample()
f.write(sample)
if _ != num_samples - 1:
f.write("\n") # Add newline character for all lines except the last one
if __name__ == "__main__":
main()