|
42 | 42 |
|
43 | 43 | import keras_remote |
44 | 44 |
|
| 45 | + |
45 | 46 | # Example 1: CPU-only execution (works with default cluster) |
46 | 47 | @keras_remote.run(accelerator="cpu") |
47 | 48 | def simple_computation(x, y): |
48 | | - """Simple addition that runs on remote CPU.""" |
49 | | - result = x + y |
50 | | - print(f"Computing {x} + {y} = {result}") |
51 | | - return result |
| 49 | + """Simple addition that runs on remote CPU.""" |
| 50 | + result = x + y |
| 51 | + print(f"Computing {x} + {y} = {result}") |
| 52 | + return result |
52 | 53 |
|
53 | 54 |
|
54 | 55 | # Example 2: Keras model training on CPU |
55 | 56 | @keras_remote.run(accelerator="cpu") |
56 | 57 | def train_simple_model_cpu(): |
57 | | - """Train a simple Keras model on remote CPU.""" |
58 | | - |
59 | | - # Create a simple model |
60 | | - model = keras.Sequential( |
61 | | - [ |
62 | | - keras.layers.Dense(64, activation="relu", input_shape=(10,)), |
63 | | - keras.layers.Dense(64, activation="relu"), |
64 | | - keras.layers.Dense(1), |
65 | | - ] |
66 | | - ) |
| 58 | + """Train a simple Keras model on remote CPU.""" |
| 59 | + |
| 60 | + # Create a simple model |
| 61 | + model = keras.Sequential( |
| 62 | + [ |
| 63 | + keras.layers.Dense(64, activation="relu", input_shape=(10,)), |
| 64 | + keras.layers.Dense(64, activation="relu"), |
| 65 | + keras.layers.Dense(1), |
| 66 | + ] |
| 67 | + ) |
67 | 68 |
|
68 | | - model.compile(optimizer="adam", loss="mse") |
| 69 | + model.compile(optimizer="adam", loss="mse") |
69 | 70 |
|
70 | | - # Generate some dummy data |
71 | | - x_train = np.random.randn(1000, 10) |
72 | | - y_train = np.random.randn(1000, 1) |
| 71 | + # Generate some dummy data |
| 72 | + x_train = np.random.randn(1000, 10) |
| 73 | + y_train = np.random.randn(1000, 1) |
73 | 74 |
|
74 | | - # Train the model |
75 | | - print("Training model on CPU...") |
76 | | - history = model.fit(x_train, y_train, epochs=5, batch_size=32, verbose=1) |
| 75 | + # Train the model |
| 76 | + print("Training model on CPU...") |
| 77 | + history = model.fit(x_train, y_train, epochs=5, batch_size=32, verbose=1) |
77 | 78 |
|
78 | | - print(f"Final loss: {history.history['loss'][-1]}") |
79 | | - return history.history["loss"][-1] |
| 79 | + print(f"Final loss: {history.history['loss'][-1]}") |
| 80 | + return history.history["loss"][-1] |
80 | 81 |
|
81 | 82 |
|
82 | 83 | # Example 3: GPU training (requires GPU node pool) |
83 | 84 | @keras_remote.run(accelerator="nvidia-tesla-t4") |
84 | 85 | def train_model_gpu(): |
85 | | - """Train a Keras model on remote GPU. Requires T4 GPU node pool.""" |
86 | | - model = keras.Sequential( |
87 | | - [ |
88 | | - keras.layers.Dense(128, activation="relu", input_shape=(20,)), |
89 | | - keras.layers.Dense(128, activation="relu"), |
90 | | - keras.layers.Dense(1), |
91 | | - ] |
92 | | - ) |
| 86 | + """Train a Keras model on remote GPU. Requires T4 GPU node pool.""" |
| 87 | + model = keras.Sequential( |
| 88 | + [ |
| 89 | + keras.layers.Dense(128, activation="relu", input_shape=(20,)), |
| 90 | + keras.layers.Dense(128, activation="relu"), |
| 91 | + keras.layers.Dense(1), |
| 92 | + ] |
| 93 | + ) |
93 | 94 |
|
94 | | - model.compile(optimizer="adam", loss="mse") |
| 95 | + model.compile(optimizer="adam", loss="mse") |
95 | 96 |
|
96 | | - x_train = np.random.randn(5000, 20) |
97 | | - y_train = np.random.randn(5000, 1) |
| 97 | + x_train = np.random.randn(5000, 20) |
| 98 | + y_train = np.random.randn(5000, 1) |
98 | 99 |
|
99 | | - print("Training model on T4 GPU...") |
100 | | - history = model.fit(x_train, y_train, epochs=10, batch_size=64, verbose=1) |
| 100 | + print("Training model on T4 GPU...") |
| 101 | + history = model.fit(x_train, y_train, epochs=10, batch_size=64, verbose=1) |
101 | 102 |
|
102 | | - return history.history["loss"][-1] |
| 103 | + return history.history["loss"][-1] |
103 | 104 |
|
104 | 105 |
|
105 | 106 | def main(): |
106 | | - """Run examples.""" |
107 | | - print("=" * 60) |
108 | | - print("Keras Remote - GKE Examples") |
109 | | - print("=" * 60) |
110 | | - |
111 | | - # Example 1: Simple computation (CPU) |
112 | | - print("\n--- Example 1: Simple Computation (CPU) ---") |
113 | | - print("Running simple_computation(10, 20) on GKE...") |
114 | | - result = simple_computation(10, 20) |
115 | | - print(f"Result: {result}") |
116 | | - |
117 | | - # Example 2: Model training on CPU |
118 | | - print("\n--- Example 2: Keras Model Training (CPU) ---") |
119 | | - print("Training a simple model on CPU...") |
120 | | - final_loss = train_simple_model_cpu() |
121 | | - print(f"Model trained. Final loss: {final_loss}") |
122 | | - |
123 | | - # Example 3: GPU training (requires GPU node pool) |
124 | | - # Uncomment to run if you have T4 GPU nodes available |
125 | | - # print("\n--- Example 3: Model Training on T4 GPU ---") |
126 | | - # final_loss = train_model_gpu() |
127 | | - # print(f"Model trained. Final loss: {final_loss}") |
128 | | - |
129 | | - print("\n" + "=" * 60) |
130 | | - print("Examples completed!") |
131 | | - print("=" * 60) |
| 107 | + """Run examples.""" |
| 108 | + print("=" * 60) |
| 109 | + print("Keras Remote - GKE Examples") |
| 110 | + print("=" * 60) |
| 111 | + |
| 112 | + # Example 1: Simple computation (CPU) |
| 113 | + print("\n--- Example 1: Simple Computation (CPU) ---") |
| 114 | + print("Running simple_computation(10, 20) on GKE...") |
| 115 | + result = simple_computation(10, 20) |
| 116 | + print(f"Result: {result}") |
| 117 | + |
| 118 | + # Example 2: Model training on CPU |
| 119 | + print("\n--- Example 2: Keras Model Training (CPU) ---") |
| 120 | + print("Training a simple model on CPU...") |
| 121 | + final_loss = train_simple_model_cpu() |
| 122 | + print(f"Model trained. Final loss: {final_loss}") |
| 123 | + |
| 124 | + # Example 3: GPU training (requires GPU node pool) |
| 125 | + # Uncomment to run if you have T4 GPU nodes available |
| 126 | + # print("\n--- Example 3: Model Training on T4 GPU ---") |
| 127 | + # final_loss = train_model_gpu() |
| 128 | + # print(f"Model trained. Final loss: {final_loss}") |
| 129 | + |
| 130 | + print("\n" + "=" * 60) |
| 131 | + print("Examples completed!") |
| 132 | + print("=" * 60) |
132 | 133 |
|
133 | 134 |
|
134 | 135 | if __name__ == "__main__": |
135 | | - # Prerequisites: |
136 | | - # 1. Set KERAS_REMOTE_PROJECT environment variable to your GCP project ID |
137 | | - # 2. Configure kubectl: gcloud container clusters get-credentials <cluster> --zone <zone> |
138 | | - # 3. Ensure your GKE cluster has GPU nodes with the required accelerator type |
139 | | - if not os.environ.get("KERAS_REMOTE_PROJECT"): |
140 | | - print("ERROR: KERAS_REMOTE_PROJECT environment variable not set") |
141 | | - print("Please set it to your GCP project ID:") |
142 | | - print(" export KERAS_REMOTE_PROJECT=your-project-id") |
143 | | - exit(1) |
144 | | - |
145 | | - # Verify kubectl is configured |
146 | | - try: |
147 | | - result = subprocess.run( |
148 | | - ["kubectl", "cluster-info"], capture_output=True, text=True, timeout=10 |
149 | | - ) |
150 | | - if result.returncode != 0: |
151 | | - print("ERROR: kubectl is not configured or cluster is not accessible") |
152 | | - print("Please configure kubectl:") |
153 | | - print( |
154 | | - " gcloud container clusters get-credentials <cluster-name> --zone <zone>" |
155 | | - ) |
156 | | - exit(1) |
157 | | - except FileNotFoundError: |
158 | | - print("ERROR: kubectl not found. Please install kubectl.") |
159 | | - exit(1) |
160 | | - except subprocess.TimeoutExpired: |
161 | | - print("ERROR: kubectl timed out. Check your cluster connectivity.") |
162 | | - exit(1) |
163 | | - |
164 | | - main() |
| 136 | + # Prerequisites: |
| 137 | + # 1. Set KERAS_REMOTE_PROJECT environment variable to your GCP project ID |
| 138 | + # 2. Configure kubectl: gcloud container clusters get-credentials <cluster> --zone <zone> |
| 139 | + # 3. Ensure your GKE cluster has GPU nodes with the required accelerator type |
| 140 | + if not os.environ.get("KERAS_REMOTE_PROJECT"): |
| 141 | + print("ERROR: KERAS_REMOTE_PROJECT environment variable not set") |
| 142 | + print("Please set it to your GCP project ID:") |
| 143 | + print(" export KERAS_REMOTE_PROJECT=your-project-id") |
| 144 | + exit(1) |
| 145 | + |
| 146 | + # Verify kubectl is configured |
| 147 | + try: |
| 148 | + result = subprocess.run( |
| 149 | + ["kubectl", "cluster-info"], capture_output=True, text=True, timeout=10 |
| 150 | + ) |
| 151 | + if result.returncode != 0: |
| 152 | + print("ERROR: kubectl is not configured or cluster is not accessible") |
| 153 | + print("Please configure kubectl:") |
| 154 | + print( |
| 155 | + " gcloud container clusters get-credentials <cluster-name> --zone <zone>" |
| 156 | + ) |
| 157 | + exit(1) |
| 158 | + except FileNotFoundError: |
| 159 | + print("ERROR: kubectl not found. Please install kubectl.") |
| 160 | + exit(1) |
| 161 | + except subprocess.TimeoutExpired: |
| 162 | + print("ERROR: kubectl timed out. Check your cluster connectivity.") |
| 163 | + exit(1) |
| 164 | + |
| 165 | + main() |
0 commit comments