-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_model.py
More file actions
110 lines (86 loc) · 3.6 KB
/
create_model.py
File metadata and controls
110 lines (86 loc) · 3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import numpy as np
import tensorflow as tf
import math
def int_to_digit_array(arr, width=15):
"""Convert an array of integers to scaled digit arrays."""
powers_of_ten = 10 ** np.arange(width, dtype=np.int64)[::-1]
digit_array = (arr[:, np.newaxis] // powers_of_ten) % 10
return (digit_array / 9.0).astype(np.float32) # Scale 0-9 to 0.0-1.0
def data_generator(indices, batch_size, width):
"""
Generator that yields batches of data.
Args:
indices: The specific array of numbers (train or val) to generate from
batch_size: Number of samples per batch
width: Number of digits per number
"""
total = len(indices)
for i in range(0, total, batch_size):
batch_indices = indices[i : i + batch_size]
# Features: The digits of the number
X_batch = int_to_digit_array(batch_indices, width)
# Label: 0 if even, 1 if odd
y_batch = (batch_indices % 2).reshape(-1, 1).astype(np.float32)
yield X_batch, y_batch
def main():
# Constants
start = 0
end = 1_000_000
batch_size = 1024
epochs = 10
width = 15
# --- Dataset Setup ---
# Generate random integers within the 15-digit range
all_indices = np.random.randint(0, 10**width, size=(end-start), dtype=np.int64)
# Simple 60/40 split
split_point = int(len(all_indices) * 0.6)
train_indices = all_indices[:split_point]
val_indices = all_indices[split_point:]
steps = math.ceil(len(train_indices) / batch_size)
train_dataset = tf.data.Dataset.from_generator(
lambda: data_generator(train_indices, batch_size, width),
output_signature=(
tf.TensorSpec(shape=(None, width), dtype=tf.float32),
tf.TensorSpec(shape=(None, 1), dtype=tf.float32)
)
).shuffle(buffer_size=1000).prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_generator(
lambda: data_generator(val_indices, batch_size, width),
output_signature=(
tf.TensorSpec(shape=(None, width), dtype=tf.float32),
tf.TensorSpec(shape=(None, 1), dtype=tf.float32)
)
).prefetch(tf.data.AUTOTUNE)
# --- Model Definition ---
model = tf.keras.Sequential([
tf.keras.layers.Dense(15),
tf.keras.layers.BatchNormalization(momentum=0.9),
tf.keras.layers.LeakyReLU(negative_slope=0.01),
tf.keras.layers.Dense(7),
tf.keras.layers.BatchNormalization(momentum=0.9),
tf.keras.layers.LeakyReLU(negative_slope=0.01),
tf.keras.layers.Dense(1, activation='sigmoid')
])
# --- Compile Model ---
# Use AdamW for better weight management
optimizer = tf.keras.optimizers.AdamW(learning_rate=0.002, weight_decay=0.004)
model.compile(optimizer=optimizer,
loss='binary_crossentropy',
metrics=['accuracy'])
# --- Training ---
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.2,
patience=2,
min_lr=0.0001,
verbose=1
)
model.fit(train_dataset, validation_data=val_dataset, epochs=epochs,
callbacks=[lr_scheduler], verbose=1)
# --- Results ---
print(f"maximum accuracy of '{max(model.history.history['val_accuracy'])}' at epoch number {model.history.history['val_accuracy'].index(max(model.history.history['val_accuracy']))}")
print(f"minimum loss of '{min(model.history.history['val_loss'])}' at epoch number {model.history.history['val_loss'].index(min(model.history.history['val_loss']))}")
# --- Save Model ---
model.save('model.keras')
if __name__ == '__main__':
main()