ml-isOdd/create_model.py at main · j-xu0/ml-isOdd · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import numpy as np
import tensorflow as tf
import math


def int_to_digit_array(arr, width=15):
    """Convert an array of integers to scaled digit arrays."""
    powers_of_ten = 10 ** np.arange(width, dtype=np.int64)[::-1]
    digit_array = (arr[:, np.newaxis] // powers_of_ten) % 10
    return (digit_array / 9.0).astype(np.float32)  # Scale 0-9 to 0.0-1.0


def data_generator(indices, batch_size, width):
    """
    Generator that yields batches of data.

    Args:
        indices: The specific array of numbers (train or val) to generate from
        batch_size: Number of samples per batch
        width: Number of digits per number
    """
    total = len(indices)
    for i in range(0, total, batch_size):
        batch_indices = indices[i : i + batch_size]

        # Features: The digits of the number
        X_batch = int_to_digit_array(batch_indices, width)

        # Label: 0 if even, 1 if odd
        y_batch = (batch_indices % 2).reshape(-1, 1).astype(np.float32)

        yield X_batch, y_batch


def main():
    # Constants
    start = 0
    end = 1_000_000
    batch_size = 1024
    epochs = 10
    width = 15

    # --- Dataset Setup ---
    # Generate random integers within the 15-digit range
    all_indices = np.random.randint(0, 10**width, size=(end-start), dtype=np.int64)

    # Simple 60/40 split
    split_point = int(len(all_indices) * 0.6)
    train_indices = all_indices[:split_point]
    val_indices = all_indices[split_point:]

    steps = math.ceil(len(train_indices) / batch_size)

    train_dataset = tf.data.Dataset.from_generator(
        lambda: data_generator(train_indices, batch_size, width),
        output_signature=(
            tf.TensorSpec(shape=(None, width), dtype=tf.float32),
            tf.TensorSpec(shape=(None, 1), dtype=tf.float32)
        )
    ).shuffle(buffer_size=1000).prefetch(tf.data.AUTOTUNE)

    val_dataset = tf.data.Dataset.from_generator(
        lambda: data_generator(val_indices, batch_size, width),
        output_signature=(
            tf.TensorSpec(shape=(None, width), dtype=tf.float32),
            tf.TensorSpec(shape=(None, 1), dtype=tf.float32)
        )
    ).prefetch(tf.data.AUTOTUNE)

    # --- Model Definition ---
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(15),
        tf.keras.layers.BatchNormalization(momentum=0.9),
        tf.keras.layers.LeakyReLU(negative_slope=0.01),
        tf.keras.layers.Dense(7),
        tf.keras.layers.BatchNormalization(momentum=0.9),
        tf.keras.layers.LeakyReLU(negative_slope=0.01),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    # --- Compile Model ---
    # Use AdamW for better weight management
    optimizer = tf.keras.optimizers.AdamW(learning_rate=0.002, weight_decay=0.004)

    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # --- Training ---
    lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=2,
        min_lr=0.0001,
        verbose=1
    )

    model.fit(train_dataset, validation_data=val_dataset, epochs=epochs,
              callbacks=[lr_scheduler], verbose=1)

    # --- Results ---
    print(f"maximum accuracy of '{max(model.history.history['val_accuracy'])}' at epoch number {model.history.history['val_accuracy'].index(max(model.history.history['val_accuracy']))}")
    print(f"minimum loss of '{min(model.history.history['val_loss'])}' at epoch number {model.history.history['val_loss'].index(min(model.history.history['val_loss']))}")

    # --- Save Model ---
    model.save('model.keras')


if __name__ == '__main__':
    main()