-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathtest_serialization.py
More file actions
161 lines (135 loc) · 5.12 KB
/
test_serialization.py
File metadata and controls
161 lines (135 loc) · 5.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python3
"""
Test script to verify that the transformer model can be serialized and deserialized properly.
"""
import tempfile
import os
import numpy as np
import keras
from ml4h.models.transformer_blocks_embedding import build_embedding_transformer
def test_model_serialization():
"""Test that the model can be saved and loaded without errors."""
# Model parameters
INPUT_NUMERIC_COLS = ['feature1', 'feature2', 'feature3']
REGRESSION_TARGETS = ['target1']
BINARY_TARGETS = ['target2']
MAX_LEN = 10
EMB_DIM = 32
TOKEN_HIDDEN = 64
TRANSFORMER_DIM = 128
NUM_HEADS = 4
NUM_LAYERS = 2
DROPOUT = 0.1
view2id = {'view1': 1, 'view2': 2}
print("Building model...")
model = build_embedding_transformer(
INPUT_NUMERIC_COLS=INPUT_NUMERIC_COLS,
REGRESSION_TARGETS=REGRESSION_TARGETS,
BINARY_TARGETS=BINARY_TARGETS,
MAX_LEN=MAX_LEN,
EMB_DIM=EMB_DIM,
TOKEN_HIDDEN=TOKEN_HIDDEN,
TRANSFORMER_DIM=TRANSFORMER_DIM,
NUM_HEADS=NUM_HEADS,
NUM_LAYERS=NUM_LAYERS,
DROPOUT=DROPOUT,
view2id=view2id,
)
# Create dummy data for testing
batch_size = 2
dummy_input = {
'view': np.random.randint(0, len(view2id) + 1, (batch_size, MAX_LEN)),
'num': np.random.random((batch_size, MAX_LEN, len(INPUT_NUMERIC_COLS))),
'mask': np.ones((batch_size, MAX_LEN), dtype=bool)
}
dummy_output = {
'target1': np.random.random((batch_size, 1)),
'target2': np.random.randint(0, 2, (batch_size, 1))
}
print("Testing forward pass...")
# Test forward pass
predictions = model.predict(dummy_input, verbose=0)
print(f"Forward pass successful. Output shapes: {[(k, v.shape) for k, v in predictions.items()]}")
# Test serialization
with tempfile.NamedTemporaryFile(suffix='.keras', delete=False) as tmp_file:
model_path = tmp_file.name
try:
print(f"Saving model to {model_path}...")
model.save(model_path)
print("Model saved successfully!")
print("Loading model...")
loaded_model = keras.models.load_model(model_path)
print("Model loaded successfully!")
# Test that loaded model works
print("Testing loaded model forward pass...")
loaded_predictions = loaded_model.predict(dummy_input, verbose=0)
print(f"Loaded model forward pass successful. Output shapes: {[(k, v.shape) for k, v in loaded_predictions.items()]}")
# Compare predictions (should be identical)
for key in predictions:
diff = np.abs(predictions[key] - loaded_predictions[key]).max()
print(f"Max difference for {key}: {diff:.2e}")
assert diff < 1e-6, f"Predictions differ too much for {key}"
print("✅ Serialization test passed!")
finally:
# Clean up
if os.path.exists(model_path):
os.unlink(model_path)
def test_model_without_view2id():
"""Test model serialization without view2id parameter."""
# Model parameters
INPUT_NUMERIC_COLS = ['feature1', 'feature2', 'feature3']
REGRESSION_TARGETS = ['target1']
BINARY_TARGETS = ['target2']
MAX_LEN = 10
EMB_DIM = 32
TOKEN_HIDDEN = 64
TRANSFORMER_DIM = 128
NUM_HEADS = 4
NUM_LAYERS = 2
DROPOUT = 0.1
view2id = None
print("\nBuilding model without view2id...")
model = build_embedding_transformer(
INPUT_NUMERIC_COLS=INPUT_NUMERIC_COLS,
REGRESSION_TARGETS=REGRESSION_TARGETS,
BINARY_TARGETS=BINARY_TARGETS,
MAX_LEN=MAX_LEN,
EMB_DIM=EMB_DIM,
TOKEN_HIDDEN=TOKEN_HIDDEN,
TRANSFORMER_DIM=TRANSFORMER_DIM,
NUM_HEADS=NUM_HEADS,
NUM_LAYERS=NUM_LAYERS,
DROPOUT=DROPOUT,
view2id=view2id,
)
# Create dummy data for testing
batch_size = 2
dummy_input = {
'num': np.random.random((batch_size, MAX_LEN, len(INPUT_NUMERIC_COLS))),
'mask': np.ones((batch_size, MAX_LEN), dtype=bool)
}
print("Testing forward pass...")
predictions = model.predict(dummy_input, verbose=0)
print(f"Forward pass successful. Output shapes: {[(k, v.shape) for k, v in predictions.items()]}")
# Test serialization
with tempfile.NamedTemporaryFile(suffix='.keras', delete=False) as tmp_file:
model_path = tmp_file.name
try:
print(f"Saving model to {model_path}...")
model.save(model_path)
print("Model saved successfully!")
print("Loading model...")
loaded_model = keras.models.load_model(model_path)
print("Model loaded successfully!")
print("Testing loaded model forward pass...")
loaded_predictions = loaded_model.predict(dummy_input, verbose=0)
print(f"Loaded model forward pass successful. Output shapes: {[(k, v.shape) for k, v in loaded_predictions.items()]}")
print("✅ Serialization test without view2id passed!")
finally:
# Clean up
if os.path.exists(model_path):
os.unlink(model_path)
if __name__ == "__main__":
test_model_serialization()
test_model_without_view2id()
print("\n🎉 All serialization tests passed!")